Add -march=iamcu to optimize for IA MCU
[official-gcc.git] / gcc / config / i386 / i386.c
blob9fb8db613a6916b215ae0f0a695aa8d97f7a113e
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "alias.h"
26 #include "symtab.h"
27 #include "tree.h"
28 #include "fold-const.h"
29 #include "stringpool.h"
30 #include "attribs.h"
31 #include "calls.h"
32 #include "stor-layout.h"
33 #include "varasm.h"
34 #include "tm_p.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "insn-config.h"
38 #include "conditions.h"
39 #include "output.h"
40 #include "insn-codes.h"
41 #include "insn-attr.h"
42 #include "flags.h"
43 #include "except.h"
44 #include "function.h"
45 #include "recog.h"
46 #include "expmed.h"
47 #include "dojump.h"
48 #include "explow.h"
49 #include "emit-rtl.h"
50 #include "stmt.h"
51 #include "expr.h"
52 #include "optabs.h"
53 #include "diagnostic-core.h"
54 #include "toplev.h"
55 #include "predict.h"
56 #include "dominance.h"
57 #include "cfg.h"
58 #include "cfgrtl.h"
59 #include "cfganal.h"
60 #include "lcm.h"
61 #include "cfgbuild.h"
62 #include "cfgcleanup.h"
63 #include "basic-block.h"
64 #include "target.h"
65 #include "common/common-target.h"
66 #include "langhooks.h"
67 #include "reload.h"
68 #include "cgraph.h"
69 #include "tree-ssa-alias.h"
70 #include "internal-fn.h"
71 #include "gimple-fold.h"
72 #include "tree-eh.h"
73 #include "gimple-expr.h"
74 #include "gimple.h"
75 #include "gimplify.h"
76 #include "cfgloop.h"
77 #include "dwarf2.h"
78 #include "df.h"
79 #include "tm-constrs.h"
80 #include "params.h"
81 #include "cselib.h"
82 #include "debug.h"
83 #include "sched-int.h"
84 #include "sbitmap.h"
85 #include "fibheap.h"
86 #include "opts.h"
87 #include "diagnostic.h"
88 #include "dumpfile.h"
89 #include "tree-pass.h"
90 #include "context.h"
91 #include "pass_manager.h"
92 #include "target-globals.h"
93 #include "tree-vectorizer.h"
94 #include "shrink-wrap.h"
95 #include "builtins.h"
96 #include "rtl-iter.h"
97 #include "tree-iterator.h"
98 #include "tree-chkp.h"
99 #include "rtl-chkp.h"
101 /* This file should be included last. */
102 #include "target-def.h"
104 static rtx legitimize_dllimport_symbol (rtx, bool);
105 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
106 static rtx legitimize_pe_coff_symbol (rtx, bool);
108 #ifndef CHECK_STACK_LIMIT
109 #define CHECK_STACK_LIMIT (-1)
110 #endif
112 /* Return index of given mode in mult and division cost tables. */
113 #define MODE_INDEX(mode) \
114 ((mode) == QImode ? 0 \
115 : (mode) == HImode ? 1 \
116 : (mode) == SImode ? 2 \
117 : (mode) == DImode ? 3 \
118 : 4)
120 /* Processor costs (relative to an add) */
121 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
122 #define COSTS_N_BYTES(N) ((N) * 2)
124 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
126 static stringop_algs ix86_size_memcpy[2] = {
127 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
128 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
129 static stringop_algs ix86_size_memset[2] = {
130 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
131 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
133 const
134 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
135 COSTS_N_BYTES (2), /* cost of an add instruction */
136 COSTS_N_BYTES (3), /* cost of a lea instruction */
137 COSTS_N_BYTES (2), /* variable shift costs */
138 COSTS_N_BYTES (3), /* constant shift costs */
139 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
140 COSTS_N_BYTES (3), /* HI */
141 COSTS_N_BYTES (3), /* SI */
142 COSTS_N_BYTES (3), /* DI */
143 COSTS_N_BYTES (5)}, /* other */
144 0, /* cost of multiply per each bit set */
145 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
146 COSTS_N_BYTES (3), /* HI */
147 COSTS_N_BYTES (3), /* SI */
148 COSTS_N_BYTES (3), /* DI */
149 COSTS_N_BYTES (5)}, /* other */
150 COSTS_N_BYTES (3), /* cost of movsx */
151 COSTS_N_BYTES (3), /* cost of movzx */
152 0, /* "large" insn */
153 2, /* MOVE_RATIO */
154 2, /* cost for loading QImode using movzbl */
155 {2, 2, 2}, /* cost of loading integer registers
156 in QImode, HImode and SImode.
157 Relative to reg-reg move (2). */
158 {2, 2, 2}, /* cost of storing integer registers */
159 2, /* cost of reg,reg fld/fst */
160 {2, 2, 2}, /* cost of loading fp registers
161 in SFmode, DFmode and XFmode */
162 {2, 2, 2}, /* cost of storing fp registers
163 in SFmode, DFmode and XFmode */
164 3, /* cost of moving MMX register */
165 {3, 3}, /* cost of loading MMX registers
166 in SImode and DImode */
167 {3, 3}, /* cost of storing MMX registers
168 in SImode and DImode */
169 3, /* cost of moving SSE register */
170 {3, 3, 3}, /* cost of loading SSE registers
171 in SImode, DImode and TImode */
172 {3, 3, 3}, /* cost of storing SSE registers
173 in SImode, DImode and TImode */
174 3, /* MMX or SSE register to integer */
175 0, /* size of l1 cache */
176 0, /* size of l2 cache */
177 0, /* size of prefetch block */
178 0, /* number of parallel prefetches */
179 2, /* Branch cost */
180 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
181 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
182 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
183 COSTS_N_BYTES (2), /* cost of FABS instruction. */
184 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
185 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
186 ix86_size_memcpy,
187 ix86_size_memset,
188 1, /* scalar_stmt_cost. */
189 1, /* scalar load_cost. */
190 1, /* scalar_store_cost. */
191 1, /* vec_stmt_cost. */
192 1, /* vec_to_scalar_cost. */
193 1, /* scalar_to_vec_cost. */
194 1, /* vec_align_load_cost. */
195 1, /* vec_unalign_load_cost. */
196 1, /* vec_store_cost. */
197 1, /* cond_taken_branch_cost. */
198 1, /* cond_not_taken_branch_cost. */
201 /* Processor costs (relative to an add) */
202 static stringop_algs i386_memcpy[2] = {
203 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
204 DUMMY_STRINGOP_ALGS};
205 static stringop_algs i386_memset[2] = {
206 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
207 DUMMY_STRINGOP_ALGS};
209 static const
210 struct processor_costs i386_cost = { /* 386 specific costs */
211 COSTS_N_INSNS (1), /* cost of an add instruction */
212 COSTS_N_INSNS (1), /* cost of a lea instruction */
213 COSTS_N_INSNS (3), /* variable shift costs */
214 COSTS_N_INSNS (2), /* constant shift costs */
215 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
216 COSTS_N_INSNS (6), /* HI */
217 COSTS_N_INSNS (6), /* SI */
218 COSTS_N_INSNS (6), /* DI */
219 COSTS_N_INSNS (6)}, /* other */
220 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
221 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
222 COSTS_N_INSNS (23), /* HI */
223 COSTS_N_INSNS (23), /* SI */
224 COSTS_N_INSNS (23), /* DI */
225 COSTS_N_INSNS (23)}, /* other */
226 COSTS_N_INSNS (3), /* cost of movsx */
227 COSTS_N_INSNS (2), /* cost of movzx */
228 15, /* "large" insn */
229 3, /* MOVE_RATIO */
230 4, /* cost for loading QImode using movzbl */
231 {2, 4, 2}, /* cost of loading integer registers
232 in QImode, HImode and SImode.
233 Relative to reg-reg move (2). */
234 {2, 4, 2}, /* cost of storing integer registers */
235 2, /* cost of reg,reg fld/fst */
236 {8, 8, 8}, /* cost of loading fp registers
237 in SFmode, DFmode and XFmode */
238 {8, 8, 8}, /* cost of storing fp registers
239 in SFmode, DFmode and XFmode */
240 2, /* cost of moving MMX register */
241 {4, 8}, /* cost of loading MMX registers
242 in SImode and DImode */
243 {4, 8}, /* cost of storing MMX registers
244 in SImode and DImode */
245 2, /* cost of moving SSE register */
246 {4, 8, 16}, /* cost of loading SSE registers
247 in SImode, DImode and TImode */
248 {4, 8, 16}, /* cost of storing SSE registers
249 in SImode, DImode and TImode */
250 3, /* MMX or SSE register to integer */
251 0, /* size of l1 cache */
252 0, /* size of l2 cache */
253 0, /* size of prefetch block */
254 0, /* number of parallel prefetches */
255 1, /* Branch cost */
256 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
257 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
258 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
259 COSTS_N_INSNS (22), /* cost of FABS instruction. */
260 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
261 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
262 i386_memcpy,
263 i386_memset,
264 1, /* scalar_stmt_cost. */
265 1, /* scalar load_cost. */
266 1, /* scalar_store_cost. */
267 1, /* vec_stmt_cost. */
268 1, /* vec_to_scalar_cost. */
269 1, /* scalar_to_vec_cost. */
270 1, /* vec_align_load_cost. */
271 2, /* vec_unalign_load_cost. */
272 1, /* vec_store_cost. */
273 3, /* cond_taken_branch_cost. */
274 1, /* cond_not_taken_branch_cost. */
277 static stringop_algs i486_memcpy[2] = {
278 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
279 DUMMY_STRINGOP_ALGS};
280 static stringop_algs i486_memset[2] = {
281 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
282 DUMMY_STRINGOP_ALGS};
284 static const
285 struct processor_costs i486_cost = { /* 486 specific costs */
286 COSTS_N_INSNS (1), /* cost of an add instruction */
287 COSTS_N_INSNS (1), /* cost of a lea instruction */
288 COSTS_N_INSNS (3), /* variable shift costs */
289 COSTS_N_INSNS (2), /* constant shift costs */
290 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
291 COSTS_N_INSNS (12), /* HI */
292 COSTS_N_INSNS (12), /* SI */
293 COSTS_N_INSNS (12), /* DI */
294 COSTS_N_INSNS (12)}, /* other */
295 1, /* cost of multiply per each bit set */
296 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
297 COSTS_N_INSNS (40), /* HI */
298 COSTS_N_INSNS (40), /* SI */
299 COSTS_N_INSNS (40), /* DI */
300 COSTS_N_INSNS (40)}, /* other */
301 COSTS_N_INSNS (3), /* cost of movsx */
302 COSTS_N_INSNS (2), /* cost of movzx */
303 15, /* "large" insn */
304 3, /* MOVE_RATIO */
305 4, /* cost for loading QImode using movzbl */
306 {2, 4, 2}, /* cost of loading integer registers
307 in QImode, HImode and SImode.
308 Relative to reg-reg move (2). */
309 {2, 4, 2}, /* cost of storing integer registers */
310 2, /* cost of reg,reg fld/fst */
311 {8, 8, 8}, /* cost of loading fp registers
312 in SFmode, DFmode and XFmode */
313 {8, 8, 8}, /* cost of storing fp registers
314 in SFmode, DFmode and XFmode */
315 2, /* cost of moving MMX register */
316 {4, 8}, /* cost of loading MMX registers
317 in SImode and DImode */
318 {4, 8}, /* cost of storing MMX registers
319 in SImode and DImode */
320 2, /* cost of moving SSE register */
321 {4, 8, 16}, /* cost of loading SSE registers
322 in SImode, DImode and TImode */
323 {4, 8, 16}, /* cost of storing SSE registers
324 in SImode, DImode and TImode */
325 3, /* MMX or SSE register to integer */
326 4, /* size of l1 cache. 486 has 8kB cache
327 shared for code and data, so 4kB is
328 not really precise. */
329 4, /* size of l2 cache */
330 0, /* size of prefetch block */
331 0, /* number of parallel prefetches */
332 1, /* Branch cost */
333 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
334 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
335 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
336 COSTS_N_INSNS (3), /* cost of FABS instruction. */
337 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
338 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
339 i486_memcpy,
340 i486_memset,
341 1, /* scalar_stmt_cost. */
342 1, /* scalar load_cost. */
343 1, /* scalar_store_cost. */
344 1, /* vec_stmt_cost. */
345 1, /* vec_to_scalar_cost. */
346 1, /* scalar_to_vec_cost. */
347 1, /* vec_align_load_cost. */
348 2, /* vec_unalign_load_cost. */
349 1, /* vec_store_cost. */
350 3, /* cond_taken_branch_cost. */
351 1, /* cond_not_taken_branch_cost. */
354 static stringop_algs pentium_memcpy[2] = {
355 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
356 DUMMY_STRINGOP_ALGS};
357 static stringop_algs pentium_memset[2] = {
358 {libcall, {{-1, rep_prefix_4_byte, false}}},
359 DUMMY_STRINGOP_ALGS};
361 static const
362 struct processor_costs pentium_cost = {
363 COSTS_N_INSNS (1), /* cost of an add instruction */
364 COSTS_N_INSNS (1), /* cost of a lea instruction */
365 COSTS_N_INSNS (4), /* variable shift costs */
366 COSTS_N_INSNS (1), /* constant shift costs */
367 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
368 COSTS_N_INSNS (11), /* HI */
369 COSTS_N_INSNS (11), /* SI */
370 COSTS_N_INSNS (11), /* DI */
371 COSTS_N_INSNS (11)}, /* other */
372 0, /* cost of multiply per each bit set */
373 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
374 COSTS_N_INSNS (25), /* HI */
375 COSTS_N_INSNS (25), /* SI */
376 COSTS_N_INSNS (25), /* DI */
377 COSTS_N_INSNS (25)}, /* other */
378 COSTS_N_INSNS (3), /* cost of movsx */
379 COSTS_N_INSNS (2), /* cost of movzx */
380 8, /* "large" insn */
381 6, /* MOVE_RATIO */
382 6, /* cost for loading QImode using movzbl */
383 {2, 4, 2}, /* cost of loading integer registers
384 in QImode, HImode and SImode.
385 Relative to reg-reg move (2). */
386 {2, 4, 2}, /* cost of storing integer registers */
387 2, /* cost of reg,reg fld/fst */
388 {2, 2, 6}, /* cost of loading fp registers
389 in SFmode, DFmode and XFmode */
390 {4, 4, 6}, /* cost of storing fp registers
391 in SFmode, DFmode and XFmode */
392 8, /* cost of moving MMX register */
393 {8, 8}, /* cost of loading MMX registers
394 in SImode and DImode */
395 {8, 8}, /* cost of storing MMX registers
396 in SImode and DImode */
397 2, /* cost of moving SSE register */
398 {4, 8, 16}, /* cost of loading SSE registers
399 in SImode, DImode and TImode */
400 {4, 8, 16}, /* cost of storing SSE registers
401 in SImode, DImode and TImode */
402 3, /* MMX or SSE register to integer */
403 8, /* size of l1 cache. */
404 8, /* size of l2 cache */
405 0, /* size of prefetch block */
406 0, /* number of parallel prefetches */
407 2, /* Branch cost */
408 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
409 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
410 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
411 COSTS_N_INSNS (1), /* cost of FABS instruction. */
412 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
413 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
414 pentium_memcpy,
415 pentium_memset,
416 1, /* scalar_stmt_cost. */
417 1, /* scalar load_cost. */
418 1, /* scalar_store_cost. */
419 1, /* vec_stmt_cost. */
420 1, /* vec_to_scalar_cost. */
421 1, /* scalar_to_vec_cost. */
422 1, /* vec_align_load_cost. */
423 2, /* vec_unalign_load_cost. */
424 1, /* vec_store_cost. */
425 3, /* cond_taken_branch_cost. */
426 1, /* cond_not_taken_branch_cost. */
429 static const
430 struct processor_costs iamcu_cost = {
431 COSTS_N_INSNS (1), /* cost of an add instruction */
432 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
433 COSTS_N_INSNS (4), /* variable shift costs */
434 COSTS_N_INSNS (1), /* constant shift costs */
435 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
436 COSTS_N_INSNS (11), /* HI */
437 COSTS_N_INSNS (11), /* SI */
438 COSTS_N_INSNS (11), /* DI */
439 COSTS_N_INSNS (11)}, /* other */
440 0, /* cost of multiply per each bit set */
441 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
442 COSTS_N_INSNS (25), /* HI */
443 COSTS_N_INSNS (25), /* SI */
444 COSTS_N_INSNS (25), /* DI */
445 COSTS_N_INSNS (25)}, /* other */
446 COSTS_N_INSNS (3), /* cost of movsx */
447 COSTS_N_INSNS (2), /* cost of movzx */
448 8, /* "large" insn */
449 6, /* MOVE_RATIO */
450 6, /* cost for loading QImode using movzbl */
451 {2, 4, 2}, /* cost of loading integer registers
452 in QImode, HImode and SImode.
453 Relative to reg-reg move (2). */
454 {2, 4, 2}, /* cost of storing integer registers */
455 2, /* cost of reg,reg fld/fst */
456 {2, 2, 6}, /* cost of loading fp registers
457 in SFmode, DFmode and XFmode */
458 {4, 4, 6}, /* cost of storing fp registers
459 in SFmode, DFmode and XFmode */
460 8, /* cost of moving MMX register */
461 {8, 8}, /* cost of loading MMX registers
462 in SImode and DImode */
463 {8, 8}, /* cost of storing MMX registers
464 in SImode and DImode */
465 2, /* cost of moving SSE register */
466 {4, 8, 16}, /* cost of loading SSE registers
467 in SImode, DImode and TImode */
468 {4, 8, 16}, /* cost of storing SSE registers
469 in SImode, DImode and TImode */
470 3, /* MMX or SSE register to integer */
471 8, /* size of l1 cache. */
472 8, /* size of l2 cache */
473 0, /* size of prefetch block */
474 0, /* number of parallel prefetches */
475 2, /* Branch cost */
476 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
477 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
478 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
479 COSTS_N_INSNS (1), /* cost of FABS instruction. */
480 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
481 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
482 pentium_memcpy,
483 pentium_memset,
484 1, /* scalar_stmt_cost. */
485 1, /* scalar load_cost. */
486 1, /* scalar_store_cost. */
487 1, /* vec_stmt_cost. */
488 1, /* vec_to_scalar_cost. */
489 1, /* scalar_to_vec_cost. */
490 1, /* vec_align_load_cost. */
491 2, /* vec_unalign_load_cost. */
492 1, /* vec_store_cost. */
493 3, /* cond_taken_branch_cost. */
494 1, /* cond_not_taken_branch_cost. */
497 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
498 (we ensure the alignment). For small blocks inline loop is still a
499 noticeable win, for bigger blocks either rep movsl or rep movsb is
500 way to go. Rep movsb has apparently more expensive startup time in CPU,
501 but after 4K the difference is down in the noise. */
502 static stringop_algs pentiumpro_memcpy[2] = {
503 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
504 {8192, rep_prefix_4_byte, false},
505 {-1, rep_prefix_1_byte, false}}},
506 DUMMY_STRINGOP_ALGS};
507 static stringop_algs pentiumpro_memset[2] = {
508 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
509 {8192, rep_prefix_4_byte, false},
510 {-1, libcall, false}}},
511 DUMMY_STRINGOP_ALGS};
512 static const
513 struct processor_costs pentiumpro_cost = {
514 COSTS_N_INSNS (1), /* cost of an add instruction */
515 COSTS_N_INSNS (1), /* cost of a lea instruction */
516 COSTS_N_INSNS (1), /* variable shift costs */
517 COSTS_N_INSNS (1), /* constant shift costs */
518 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
519 COSTS_N_INSNS (4), /* HI */
520 COSTS_N_INSNS (4), /* SI */
521 COSTS_N_INSNS (4), /* DI */
522 COSTS_N_INSNS (4)}, /* other */
523 0, /* cost of multiply per each bit set */
524 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
525 COSTS_N_INSNS (17), /* HI */
526 COSTS_N_INSNS (17), /* SI */
527 COSTS_N_INSNS (17), /* DI */
528 COSTS_N_INSNS (17)}, /* other */
529 COSTS_N_INSNS (1), /* cost of movsx */
530 COSTS_N_INSNS (1), /* cost of movzx */
531 8, /* "large" insn */
532 6, /* MOVE_RATIO */
533 2, /* cost for loading QImode using movzbl */
534 {4, 4, 4}, /* cost of loading integer registers
535 in QImode, HImode and SImode.
536 Relative to reg-reg move (2). */
537 {2, 2, 2}, /* cost of storing integer registers */
538 2, /* cost of reg,reg fld/fst */
539 {2, 2, 6}, /* cost of loading fp registers
540 in SFmode, DFmode and XFmode */
541 {4, 4, 6}, /* cost of storing fp registers
542 in SFmode, DFmode and XFmode */
543 2, /* cost of moving MMX register */
544 {2, 2}, /* cost of loading MMX registers
545 in SImode and DImode */
546 {2, 2}, /* cost of storing MMX registers
547 in SImode and DImode */
548 2, /* cost of moving SSE register */
549 {2, 2, 8}, /* cost of loading SSE registers
550 in SImode, DImode and TImode */
551 {2, 2, 8}, /* cost of storing SSE registers
552 in SImode, DImode and TImode */
553 3, /* MMX or SSE register to integer */
554 8, /* size of l1 cache. */
555 256, /* size of l2 cache */
556 32, /* size of prefetch block */
557 6, /* number of parallel prefetches */
558 2, /* Branch cost */
559 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
560 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
561 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
562 COSTS_N_INSNS (2), /* cost of FABS instruction. */
563 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
564 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
565 pentiumpro_memcpy,
566 pentiumpro_memset,
567 1, /* scalar_stmt_cost. */
568 1, /* scalar load_cost. */
569 1, /* scalar_store_cost. */
570 1, /* vec_stmt_cost. */
571 1, /* vec_to_scalar_cost. */
572 1, /* scalar_to_vec_cost. */
573 1, /* vec_align_load_cost. */
574 2, /* vec_unalign_load_cost. */
575 1, /* vec_store_cost. */
576 3, /* cond_taken_branch_cost. */
577 1, /* cond_not_taken_branch_cost. */
580 static stringop_algs geode_memcpy[2] = {
581 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
582 DUMMY_STRINGOP_ALGS};
583 static stringop_algs geode_memset[2] = {
584 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
585 DUMMY_STRINGOP_ALGS};
586 static const
587 struct processor_costs geode_cost = {
588 COSTS_N_INSNS (1), /* cost of an add instruction */
589 COSTS_N_INSNS (1), /* cost of a lea instruction */
590 COSTS_N_INSNS (2), /* variable shift costs */
591 COSTS_N_INSNS (1), /* constant shift costs */
592 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
593 COSTS_N_INSNS (4), /* HI */
594 COSTS_N_INSNS (7), /* SI */
595 COSTS_N_INSNS (7), /* DI */
596 COSTS_N_INSNS (7)}, /* other */
597 0, /* cost of multiply per each bit set */
598 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
599 COSTS_N_INSNS (23), /* HI */
600 COSTS_N_INSNS (39), /* SI */
601 COSTS_N_INSNS (39), /* DI */
602 COSTS_N_INSNS (39)}, /* other */
603 COSTS_N_INSNS (1), /* cost of movsx */
604 COSTS_N_INSNS (1), /* cost of movzx */
605 8, /* "large" insn */
606 4, /* MOVE_RATIO */
607 1, /* cost for loading QImode using movzbl */
608 {1, 1, 1}, /* cost of loading integer registers
609 in QImode, HImode and SImode.
610 Relative to reg-reg move (2). */
611 {1, 1, 1}, /* cost of storing integer registers */
612 1, /* cost of reg,reg fld/fst */
613 {1, 1, 1}, /* cost of loading fp registers
614 in SFmode, DFmode and XFmode */
615 {4, 6, 6}, /* cost of storing fp registers
616 in SFmode, DFmode and XFmode */
618 1, /* cost of moving MMX register */
619 {1, 1}, /* cost of loading MMX registers
620 in SImode and DImode */
621 {1, 1}, /* cost of storing MMX registers
622 in SImode and DImode */
623 1, /* cost of moving SSE register */
624 {1, 1, 1}, /* cost of loading SSE registers
625 in SImode, DImode and TImode */
626 {1, 1, 1}, /* cost of storing SSE registers
627 in SImode, DImode and TImode */
628 1, /* MMX or SSE register to integer */
629 64, /* size of l1 cache. */
630 128, /* size of l2 cache. */
631 32, /* size of prefetch block */
632 1, /* number of parallel prefetches */
633 1, /* Branch cost */
634 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
635 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
636 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
637 COSTS_N_INSNS (1), /* cost of FABS instruction. */
638 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
639 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
640 geode_memcpy,
641 geode_memset,
642 1, /* scalar_stmt_cost. */
643 1, /* scalar load_cost. */
644 1, /* scalar_store_cost. */
645 1, /* vec_stmt_cost. */
646 1, /* vec_to_scalar_cost. */
647 1, /* scalar_to_vec_cost. */
648 1, /* vec_align_load_cost. */
649 2, /* vec_unalign_load_cost. */
650 1, /* vec_store_cost. */
651 3, /* cond_taken_branch_cost. */
652 1, /* cond_not_taken_branch_cost. */
655 static stringop_algs k6_memcpy[2] = {
656 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
657 DUMMY_STRINGOP_ALGS};
658 static stringop_algs k6_memset[2] = {
659 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
660 DUMMY_STRINGOP_ALGS};
661 static const
662 struct processor_costs k6_cost = {
663 COSTS_N_INSNS (1), /* cost of an add instruction */
664 COSTS_N_INSNS (2), /* cost of a lea instruction */
665 COSTS_N_INSNS (1), /* variable shift costs */
666 COSTS_N_INSNS (1), /* constant shift costs */
667 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
668 COSTS_N_INSNS (3), /* HI */
669 COSTS_N_INSNS (3), /* SI */
670 COSTS_N_INSNS (3), /* DI */
671 COSTS_N_INSNS (3)}, /* other */
672 0, /* cost of multiply per each bit set */
673 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
674 COSTS_N_INSNS (18), /* HI */
675 COSTS_N_INSNS (18), /* SI */
676 COSTS_N_INSNS (18), /* DI */
677 COSTS_N_INSNS (18)}, /* other */
678 COSTS_N_INSNS (2), /* cost of movsx */
679 COSTS_N_INSNS (2), /* cost of movzx */
680 8, /* "large" insn */
681 4, /* MOVE_RATIO */
682 3, /* cost for loading QImode using movzbl */
683 {4, 5, 4}, /* cost of loading integer registers
684 in QImode, HImode and SImode.
685 Relative to reg-reg move (2). */
686 {2, 3, 2}, /* cost of storing integer registers */
687 4, /* cost of reg,reg fld/fst */
688 {6, 6, 6}, /* cost of loading fp registers
689 in SFmode, DFmode and XFmode */
690 {4, 4, 4}, /* cost of storing fp registers
691 in SFmode, DFmode and XFmode */
692 2, /* cost of moving MMX register */
693 {2, 2}, /* cost of loading MMX registers
694 in SImode and DImode */
695 {2, 2}, /* cost of storing MMX registers
696 in SImode and DImode */
697 2, /* cost of moving SSE register */
698 {2, 2, 8}, /* cost of loading SSE registers
699 in SImode, DImode and TImode */
700 {2, 2, 8}, /* cost of storing SSE registers
701 in SImode, DImode and TImode */
702 6, /* MMX or SSE register to integer */
703 32, /* size of l1 cache. */
704 32, /* size of l2 cache. Some models
705 have integrated l2 cache, but
706 optimizing for k6 is not important
707 enough to worry about that. */
708 32, /* size of prefetch block */
709 1, /* number of parallel prefetches */
710 1, /* Branch cost */
711 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
712 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
713 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
714 COSTS_N_INSNS (2), /* cost of FABS instruction. */
715 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
716 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
717 k6_memcpy,
718 k6_memset,
719 1, /* scalar_stmt_cost. */
720 1, /* scalar load_cost. */
721 1, /* scalar_store_cost. */
722 1, /* vec_stmt_cost. */
723 1, /* vec_to_scalar_cost. */
724 1, /* scalar_to_vec_cost. */
725 1, /* vec_align_load_cost. */
726 2, /* vec_unalign_load_cost. */
727 1, /* vec_store_cost. */
728 3, /* cond_taken_branch_cost. */
729 1, /* cond_not_taken_branch_cost. */
732 /* For some reason, Athlon deals better with REP prefix (relative to loops)
733 compared to K8. Alignment becomes important after 8 bytes for memcpy and
734 128 bytes for memset. */
735 static stringop_algs athlon_memcpy[2] = {
736 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
737 DUMMY_STRINGOP_ALGS};
738 static stringop_algs athlon_memset[2] = {
739 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
740 DUMMY_STRINGOP_ALGS};
741 static const
742 struct processor_costs athlon_cost = {
743 COSTS_N_INSNS (1), /* cost of an add instruction */
744 COSTS_N_INSNS (2), /* cost of a lea instruction */
745 COSTS_N_INSNS (1), /* variable shift costs */
746 COSTS_N_INSNS (1), /* constant shift costs */
747 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
748 COSTS_N_INSNS (5), /* HI */
749 COSTS_N_INSNS (5), /* SI */
750 COSTS_N_INSNS (5), /* DI */
751 COSTS_N_INSNS (5)}, /* other */
752 0, /* cost of multiply per each bit set */
753 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
754 COSTS_N_INSNS (26), /* HI */
755 COSTS_N_INSNS (42), /* SI */
756 COSTS_N_INSNS (74), /* DI */
757 COSTS_N_INSNS (74)}, /* other */
758 COSTS_N_INSNS (1), /* cost of movsx */
759 COSTS_N_INSNS (1), /* cost of movzx */
760 8, /* "large" insn */
761 9, /* MOVE_RATIO */
762 4, /* cost for loading QImode using movzbl */
763 {3, 4, 3}, /* cost of loading integer registers
764 in QImode, HImode and SImode.
765 Relative to reg-reg move (2). */
766 {3, 4, 3}, /* cost of storing integer registers */
767 4, /* cost of reg,reg fld/fst */
768 {4, 4, 12}, /* cost of loading fp registers
769 in SFmode, DFmode and XFmode */
770 {6, 6, 8}, /* cost of storing fp registers
771 in SFmode, DFmode and XFmode */
772 2, /* cost of moving MMX register */
773 {4, 4}, /* cost of loading MMX registers
774 in SImode and DImode */
775 {4, 4}, /* cost of storing MMX registers
776 in SImode and DImode */
777 2, /* cost of moving SSE register */
778 {4, 4, 6}, /* cost of loading SSE registers
779 in SImode, DImode and TImode */
780 {4, 4, 5}, /* cost of storing SSE registers
781 in SImode, DImode and TImode */
782 5, /* MMX or SSE register to integer */
783 64, /* size of l1 cache. */
784 256, /* size of l2 cache. */
785 64, /* size of prefetch block */
786 6, /* number of parallel prefetches */
787 5, /* Branch cost */
788 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
789 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
790 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
791 COSTS_N_INSNS (2), /* cost of FABS instruction. */
792 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
793 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
794 athlon_memcpy,
795 athlon_memset,
796 1, /* scalar_stmt_cost. */
797 1, /* scalar load_cost. */
798 1, /* scalar_store_cost. */
799 1, /* vec_stmt_cost. */
800 1, /* vec_to_scalar_cost. */
801 1, /* scalar_to_vec_cost. */
802 1, /* vec_align_load_cost. */
803 2, /* vec_unalign_load_cost. */
804 1, /* vec_store_cost. */
805 3, /* cond_taken_branch_cost. */
806 1, /* cond_not_taken_branch_cost. */
809 /* K8 has optimized REP instruction for medium sized blocks, but for very
810 small blocks it is better to use loop. For large blocks, libcall can
811 do nontemporary accesses and beat inline considerably. */
812 static stringop_algs k8_memcpy[2] = {
813 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
814 {-1, rep_prefix_4_byte, false}}},
815 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
816 {-1, libcall, false}}}};
817 static stringop_algs k8_memset[2] = {
818 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
819 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
820 {libcall, {{48, unrolled_loop, false},
821 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
822 static const
823 struct processor_costs k8_cost = {
824 COSTS_N_INSNS (1), /* cost of an add instruction */
825 COSTS_N_INSNS (2), /* cost of a lea instruction */
826 COSTS_N_INSNS (1), /* variable shift costs */
827 COSTS_N_INSNS (1), /* constant shift costs */
828 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
829 COSTS_N_INSNS (4), /* HI */
830 COSTS_N_INSNS (3), /* SI */
831 COSTS_N_INSNS (4), /* DI */
832 COSTS_N_INSNS (5)}, /* other */
833 0, /* cost of multiply per each bit set */
834 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
835 COSTS_N_INSNS (26), /* HI */
836 COSTS_N_INSNS (42), /* SI */
837 COSTS_N_INSNS (74), /* DI */
838 COSTS_N_INSNS (74)}, /* other */
839 COSTS_N_INSNS (1), /* cost of movsx */
840 COSTS_N_INSNS (1), /* cost of movzx */
841 8, /* "large" insn */
842 9, /* MOVE_RATIO */
843 4, /* cost for loading QImode using movzbl */
844 {3, 4, 3}, /* cost of loading integer registers
845 in QImode, HImode and SImode.
846 Relative to reg-reg move (2). */
847 {3, 4, 3}, /* cost of storing integer registers */
848 4, /* cost of reg,reg fld/fst */
849 {4, 4, 12}, /* cost of loading fp registers
850 in SFmode, DFmode and XFmode */
851 {6, 6, 8}, /* cost of storing fp registers
852 in SFmode, DFmode and XFmode */
853 2, /* cost of moving MMX register */
854 {3, 3}, /* cost of loading MMX registers
855 in SImode and DImode */
856 {4, 4}, /* cost of storing MMX registers
857 in SImode and DImode */
858 2, /* cost of moving SSE register */
859 {4, 3, 6}, /* cost of loading SSE registers
860 in SImode, DImode and TImode */
861 {4, 4, 5}, /* cost of storing SSE registers
862 in SImode, DImode and TImode */
863 5, /* MMX or SSE register to integer */
864 64, /* size of l1 cache. */
865 512, /* size of l2 cache. */
866 64, /* size of prefetch block */
867 /* New AMD processors never drop prefetches; if they cannot be performed
868 immediately, they are queued. We set number of simultaneous prefetches
869 to a large constant to reflect this (it probably is not a good idea not
870 to limit number of prefetches at all, as their execution also takes some
871 time). */
872 100, /* number of parallel prefetches */
873 3, /* Branch cost */
874 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
875 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
876 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
877 COSTS_N_INSNS (2), /* cost of FABS instruction. */
878 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
879 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
881 k8_memcpy,
882 k8_memset,
883 4, /* scalar_stmt_cost. */
884 2, /* scalar load_cost. */
885 2, /* scalar_store_cost. */
886 5, /* vec_stmt_cost. */
887 0, /* vec_to_scalar_cost. */
888 2, /* scalar_to_vec_cost. */
889 2, /* vec_align_load_cost. */
890 3, /* vec_unalign_load_cost. */
891 3, /* vec_store_cost. */
892 3, /* cond_taken_branch_cost. */
893 2, /* cond_not_taken_branch_cost. */
896 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
897 very small blocks it is better to use loop. For large blocks, libcall can
898 do nontemporary accesses and beat inline considerably. */
899 static stringop_algs amdfam10_memcpy[2] = {
900 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
901 {-1, rep_prefix_4_byte, false}}},
902 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
903 {-1, libcall, false}}}};
904 static stringop_algs amdfam10_memset[2] = {
905 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
906 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
907 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
908 {-1, libcall, false}}}};
909 struct processor_costs amdfam10_cost = {
910 COSTS_N_INSNS (1), /* cost of an add instruction */
911 COSTS_N_INSNS (2), /* cost of a lea instruction */
912 COSTS_N_INSNS (1), /* variable shift costs */
913 COSTS_N_INSNS (1), /* constant shift costs */
914 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
915 COSTS_N_INSNS (4), /* HI */
916 COSTS_N_INSNS (3), /* SI */
917 COSTS_N_INSNS (4), /* DI */
918 COSTS_N_INSNS (5)}, /* other */
919 0, /* cost of multiply per each bit set */
920 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
921 COSTS_N_INSNS (35), /* HI */
922 COSTS_N_INSNS (51), /* SI */
923 COSTS_N_INSNS (83), /* DI */
924 COSTS_N_INSNS (83)}, /* other */
925 COSTS_N_INSNS (1), /* cost of movsx */
926 COSTS_N_INSNS (1), /* cost of movzx */
927 8, /* "large" insn */
928 9, /* MOVE_RATIO */
929 4, /* cost for loading QImode using movzbl */
930 {3, 4, 3}, /* cost of loading integer registers
931 in QImode, HImode and SImode.
932 Relative to reg-reg move (2). */
933 {3, 4, 3}, /* cost of storing integer registers */
934 4, /* cost of reg,reg fld/fst */
935 {4, 4, 12}, /* cost of loading fp registers
936 in SFmode, DFmode and XFmode */
937 {6, 6, 8}, /* cost of storing fp registers
938 in SFmode, DFmode and XFmode */
939 2, /* cost of moving MMX register */
940 {3, 3}, /* cost of loading MMX registers
941 in SImode and DImode */
942 {4, 4}, /* cost of storing MMX registers
943 in SImode and DImode */
944 2, /* cost of moving SSE register */
945 {4, 4, 3}, /* cost of loading SSE registers
946 in SImode, DImode and TImode */
947 {4, 4, 5}, /* cost of storing SSE registers
948 in SImode, DImode and TImode */
949 3, /* MMX or SSE register to integer */
950 /* On K8:
951 MOVD reg64, xmmreg Double FSTORE 4
952 MOVD reg32, xmmreg Double FSTORE 4
953 On AMDFAM10:
954 MOVD reg64, xmmreg Double FADD 3
955 1/1 1/1
956 MOVD reg32, xmmreg Double FADD 3
957 1/1 1/1 */
958 64, /* size of l1 cache. */
959 512, /* size of l2 cache. */
960 64, /* size of prefetch block */
961 /* New AMD processors never drop prefetches; if they cannot be performed
962 immediately, they are queued. We set number of simultaneous prefetches
963 to a large constant to reflect this (it probably is not a good idea not
964 to limit number of prefetches at all, as their execution also takes some
965 time). */
966 100, /* number of parallel prefetches */
967 2, /* Branch cost */
968 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
969 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
970 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
971 COSTS_N_INSNS (2), /* cost of FABS instruction. */
972 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
973 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
975 amdfam10_memcpy,
976 amdfam10_memset,
977 4, /* scalar_stmt_cost. */
978 2, /* scalar load_cost. */
979 2, /* scalar_store_cost. */
980 6, /* vec_stmt_cost. */
981 0, /* vec_to_scalar_cost. */
982 2, /* scalar_to_vec_cost. */
983 2, /* vec_align_load_cost. */
984 2, /* vec_unalign_load_cost. */
985 2, /* vec_store_cost. */
986 2, /* cond_taken_branch_cost. */
987 1, /* cond_not_taken_branch_cost. */
990 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
991 very small blocks it is better to use loop. For large blocks, libcall
992 can do nontemporary accesses and beat inline considerably. */
993 static stringop_algs bdver1_memcpy[2] = {
994 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
995 {-1, rep_prefix_4_byte, false}}},
996 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
997 {-1, libcall, false}}}};
998 static stringop_algs bdver1_memset[2] = {
999 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1000 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1001 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1002 {-1, libcall, false}}}};
1004 const struct processor_costs bdver1_cost = {
1005 COSTS_N_INSNS (1), /* cost of an add instruction */
1006 COSTS_N_INSNS (1), /* cost of a lea instruction */
1007 COSTS_N_INSNS (1), /* variable shift costs */
1008 COSTS_N_INSNS (1), /* constant shift costs */
1009 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1010 COSTS_N_INSNS (4), /* HI */
1011 COSTS_N_INSNS (4), /* SI */
1012 COSTS_N_INSNS (6), /* DI */
1013 COSTS_N_INSNS (6)}, /* other */
1014 0, /* cost of multiply per each bit set */
1015 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1016 COSTS_N_INSNS (35), /* HI */
1017 COSTS_N_INSNS (51), /* SI */
1018 COSTS_N_INSNS (83), /* DI */
1019 COSTS_N_INSNS (83)}, /* other */
1020 COSTS_N_INSNS (1), /* cost of movsx */
1021 COSTS_N_INSNS (1), /* cost of movzx */
1022 8, /* "large" insn */
1023 9, /* MOVE_RATIO */
1024 4, /* cost for loading QImode using movzbl */
1025 {5, 5, 4}, /* cost of loading integer registers
1026 in QImode, HImode and SImode.
1027 Relative to reg-reg move (2). */
1028 {4, 4, 4}, /* cost of storing integer registers */
1029 2, /* cost of reg,reg fld/fst */
1030 {5, 5, 12}, /* cost of loading fp registers
1031 in SFmode, DFmode and XFmode */
1032 {4, 4, 8}, /* cost of storing fp registers
1033 in SFmode, DFmode and XFmode */
1034 2, /* cost of moving MMX register */
1035 {4, 4}, /* cost of loading MMX registers
1036 in SImode and DImode */
1037 {4, 4}, /* cost of storing MMX registers
1038 in SImode and DImode */
1039 2, /* cost of moving SSE register */
1040 {4, 4, 4}, /* cost of loading SSE registers
1041 in SImode, DImode and TImode */
1042 {4, 4, 4}, /* cost of storing SSE registers
1043 in SImode, DImode and TImode */
1044 2, /* MMX or SSE register to integer */
1045 /* On K8:
1046 MOVD reg64, xmmreg Double FSTORE 4
1047 MOVD reg32, xmmreg Double FSTORE 4
1048 On AMDFAM10:
1049 MOVD reg64, xmmreg Double FADD 3
1050 1/1 1/1
1051 MOVD reg32, xmmreg Double FADD 3
1052 1/1 1/1 */
1053 16, /* size of l1 cache. */
1054 2048, /* size of l2 cache. */
1055 64, /* size of prefetch block */
1056 /* New AMD processors never drop prefetches; if they cannot be performed
1057 immediately, they are queued. We set number of simultaneous prefetches
1058 to a large constant to reflect this (it probably is not a good idea not
1059 to limit number of prefetches at all, as their execution also takes some
1060 time). */
1061 100, /* number of parallel prefetches */
1062 2, /* Branch cost */
1063 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1064 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1065 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1066 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1067 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1068 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1070 bdver1_memcpy,
1071 bdver1_memset,
1072 6, /* scalar_stmt_cost. */
1073 4, /* scalar load_cost. */
1074 4, /* scalar_store_cost. */
1075 6, /* vec_stmt_cost. */
1076 0, /* vec_to_scalar_cost. */
1077 2, /* scalar_to_vec_cost. */
1078 4, /* vec_align_load_cost. */
1079 4, /* vec_unalign_load_cost. */
1080 4, /* vec_store_cost. */
1081 4, /* cond_taken_branch_cost. */
1082 2, /* cond_not_taken_branch_cost. */
1085 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1086 very small blocks it is better to use loop. For large blocks, libcall
1087 can do nontemporary accesses and beat inline considerably. */
1089 static stringop_algs bdver2_memcpy[2] = {
1090 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1091 {-1, rep_prefix_4_byte, false}}},
1092 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1093 {-1, libcall, false}}}};
1094 static stringop_algs bdver2_memset[2] = {
1095 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1096 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1097 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1098 {-1, libcall, false}}}};
1100 const struct processor_costs bdver2_cost = {
1101 COSTS_N_INSNS (1), /* cost of an add instruction */
1102 COSTS_N_INSNS (1), /* cost of a lea instruction */
1103 COSTS_N_INSNS (1), /* variable shift costs */
1104 COSTS_N_INSNS (1), /* constant shift costs */
1105 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1106 COSTS_N_INSNS (4), /* HI */
1107 COSTS_N_INSNS (4), /* SI */
1108 COSTS_N_INSNS (6), /* DI */
1109 COSTS_N_INSNS (6)}, /* other */
1110 0, /* cost of multiply per each bit set */
1111 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1112 COSTS_N_INSNS (35), /* HI */
1113 COSTS_N_INSNS (51), /* SI */
1114 COSTS_N_INSNS (83), /* DI */
1115 COSTS_N_INSNS (83)}, /* other */
1116 COSTS_N_INSNS (1), /* cost of movsx */
1117 COSTS_N_INSNS (1), /* cost of movzx */
1118 8, /* "large" insn */
1119 9, /* MOVE_RATIO */
1120 4, /* cost for loading QImode using movzbl */
1121 {5, 5, 4}, /* cost of loading integer registers
1122 in QImode, HImode and SImode.
1123 Relative to reg-reg move (2). */
1124 {4, 4, 4}, /* cost of storing integer registers */
1125 2, /* cost of reg,reg fld/fst */
1126 {5, 5, 12}, /* cost of loading fp registers
1127 in SFmode, DFmode and XFmode */
1128 {4, 4, 8}, /* cost of storing fp registers
1129 in SFmode, DFmode and XFmode */
1130 2, /* cost of moving MMX register */
1131 {4, 4}, /* cost of loading MMX registers
1132 in SImode and DImode */
1133 {4, 4}, /* cost of storing MMX registers
1134 in SImode and DImode */
1135 2, /* cost of moving SSE register */
1136 {4, 4, 4}, /* cost of loading SSE registers
1137 in SImode, DImode and TImode */
1138 {4, 4, 4}, /* cost of storing SSE registers
1139 in SImode, DImode and TImode */
1140 2, /* MMX or SSE register to integer */
1141 /* On K8:
1142 MOVD reg64, xmmreg Double FSTORE 4
1143 MOVD reg32, xmmreg Double FSTORE 4
1144 On AMDFAM10:
1145 MOVD reg64, xmmreg Double FADD 3
1146 1/1 1/1
1147 MOVD reg32, xmmreg Double FADD 3
1148 1/1 1/1 */
1149 16, /* size of l1 cache. */
1150 2048, /* size of l2 cache. */
1151 64, /* size of prefetch block */
1152 /* New AMD processors never drop prefetches; if they cannot be performed
1153 immediately, they are queued. We set number of simultaneous prefetches
1154 to a large constant to reflect this (it probably is not a good idea not
1155 to limit number of prefetches at all, as their execution also takes some
1156 time). */
1157 100, /* number of parallel prefetches */
1158 2, /* Branch cost */
1159 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1160 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1161 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1162 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1163 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1164 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1166 bdver2_memcpy,
1167 bdver2_memset,
1168 6, /* scalar_stmt_cost. */
1169 4, /* scalar load_cost. */
1170 4, /* scalar_store_cost. */
1171 6, /* vec_stmt_cost. */
1172 0, /* vec_to_scalar_cost. */
1173 2, /* scalar_to_vec_cost. */
1174 4, /* vec_align_load_cost. */
1175 4, /* vec_unalign_load_cost. */
1176 4, /* vec_store_cost. */
1177 4, /* cond_taken_branch_cost. */
1178 2, /* cond_not_taken_branch_cost. */
1182 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1183 very small blocks it is better to use loop. For large blocks, libcall
1184 can do nontemporary accesses and beat inline considerably. */
1185 static stringop_algs bdver3_memcpy[2] = {
1186 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1187 {-1, rep_prefix_4_byte, false}}},
1188 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1189 {-1, libcall, false}}}};
1190 static stringop_algs bdver3_memset[2] = {
1191 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1192 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1193 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1194 {-1, libcall, false}}}};
1195 struct processor_costs bdver3_cost = {
1196 COSTS_N_INSNS (1), /* cost of an add instruction */
1197 COSTS_N_INSNS (1), /* cost of a lea instruction */
1198 COSTS_N_INSNS (1), /* variable shift costs */
1199 COSTS_N_INSNS (1), /* constant shift costs */
1200 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1201 COSTS_N_INSNS (4), /* HI */
1202 COSTS_N_INSNS (4), /* SI */
1203 COSTS_N_INSNS (6), /* DI */
1204 COSTS_N_INSNS (6)}, /* other */
1205 0, /* cost of multiply per each bit set */
1206 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1207 COSTS_N_INSNS (35), /* HI */
1208 COSTS_N_INSNS (51), /* SI */
1209 COSTS_N_INSNS (83), /* DI */
1210 COSTS_N_INSNS (83)}, /* other */
1211 COSTS_N_INSNS (1), /* cost of movsx */
1212 COSTS_N_INSNS (1), /* cost of movzx */
1213 8, /* "large" insn */
1214 9, /* MOVE_RATIO */
1215 4, /* cost for loading QImode using movzbl */
1216 {5, 5, 4}, /* cost of loading integer registers
1217 in QImode, HImode and SImode.
1218 Relative to reg-reg move (2). */
1219 {4, 4, 4}, /* cost of storing integer registers */
1220 2, /* cost of reg,reg fld/fst */
1221 {5, 5, 12}, /* cost of loading fp registers
1222 in SFmode, DFmode and XFmode */
1223 {4, 4, 8}, /* cost of storing fp registers
1224 in SFmode, DFmode and XFmode */
1225 2, /* cost of moving MMX register */
1226 {4, 4}, /* cost of loading MMX registers
1227 in SImode and DImode */
1228 {4, 4}, /* cost of storing MMX registers
1229 in SImode and DImode */
1230 2, /* cost of moving SSE register */
1231 {4, 4, 4}, /* cost of loading SSE registers
1232 in SImode, DImode and TImode */
1233 {4, 4, 4}, /* cost of storing SSE registers
1234 in SImode, DImode and TImode */
1235 2, /* MMX or SSE register to integer */
1236 16, /* size of l1 cache. */
1237 2048, /* size of l2 cache. */
1238 64, /* size of prefetch block */
1239 /* New AMD processors never drop prefetches; if they cannot be performed
1240 immediately, they are queued. We set number of simultaneous prefetches
1241 to a large constant to reflect this (it probably is not a good idea not
1242 to limit number of prefetches at all, as their execution also takes some
1243 time). */
1244 100, /* number of parallel prefetches */
1245 2, /* Branch cost */
1246 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1247 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1248 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1249 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1250 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1251 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1253 bdver3_memcpy,
1254 bdver3_memset,
1255 6, /* scalar_stmt_cost. */
1256 4, /* scalar load_cost. */
1257 4, /* scalar_store_cost. */
1258 6, /* vec_stmt_cost. */
1259 0, /* vec_to_scalar_cost. */
1260 2, /* scalar_to_vec_cost. */
1261 4, /* vec_align_load_cost. */
1262 4, /* vec_unalign_load_cost. */
1263 4, /* vec_store_cost. */
1264 4, /* cond_taken_branch_cost. */
1265 2, /* cond_not_taken_branch_cost. */
1268 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1269 very small blocks it is better to use loop. For large blocks, libcall
1270 can do nontemporary accesses and beat inline considerably. */
1271 static stringop_algs bdver4_memcpy[2] = {
1272 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1273 {-1, rep_prefix_4_byte, false}}},
1274 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1275 {-1, libcall, false}}}};
1276 static stringop_algs bdver4_memset[2] = {
1277 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1278 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1279 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1280 {-1, libcall, false}}}};
1281 struct processor_costs bdver4_cost = {
1282 COSTS_N_INSNS (1), /* cost of an add instruction */
1283 COSTS_N_INSNS (1), /* cost of a lea instruction */
1284 COSTS_N_INSNS (1), /* variable shift costs */
1285 COSTS_N_INSNS (1), /* constant shift costs */
1286 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1287 COSTS_N_INSNS (4), /* HI */
1288 COSTS_N_INSNS (4), /* SI */
1289 COSTS_N_INSNS (6), /* DI */
1290 COSTS_N_INSNS (6)}, /* other */
1291 0, /* cost of multiply per each bit set */
1292 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1293 COSTS_N_INSNS (35), /* HI */
1294 COSTS_N_INSNS (51), /* SI */
1295 COSTS_N_INSNS (83), /* DI */
1296 COSTS_N_INSNS (83)}, /* other */
1297 COSTS_N_INSNS (1), /* cost of movsx */
1298 COSTS_N_INSNS (1), /* cost of movzx */
1299 8, /* "large" insn */
1300 9, /* MOVE_RATIO */
1301 4, /* cost for loading QImode using movzbl */
1302 {5, 5, 4}, /* cost of loading integer registers
1303 in QImode, HImode and SImode.
1304 Relative to reg-reg move (2). */
1305 {4, 4, 4}, /* cost of storing integer registers */
1306 2, /* cost of reg,reg fld/fst */
1307 {5, 5, 12}, /* cost of loading fp registers
1308 in SFmode, DFmode and XFmode */
1309 {4, 4, 8}, /* cost of storing fp registers
1310 in SFmode, DFmode and XFmode */
1311 2, /* cost of moving MMX register */
1312 {4, 4}, /* cost of loading MMX registers
1313 in SImode and DImode */
1314 {4, 4}, /* cost of storing MMX registers
1315 in SImode and DImode */
1316 2, /* cost of moving SSE register */
1317 {4, 4, 4}, /* cost of loading SSE registers
1318 in SImode, DImode and TImode */
1319 {4, 4, 4}, /* cost of storing SSE registers
1320 in SImode, DImode and TImode */
1321 2, /* MMX or SSE register to integer */
1322 16, /* size of l1 cache. */
1323 2048, /* size of l2 cache. */
1324 64, /* size of prefetch block */
1325 /* New AMD processors never drop prefetches; if they cannot be performed
1326 immediately, they are queued. We set number of simultaneous prefetches
1327 to a large constant to reflect this (it probably is not a good idea not
1328 to limit number of prefetches at all, as their execution also takes some
1329 time). */
1330 100, /* number of parallel prefetches */
1331 2, /* Branch cost */
1332 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1333 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1334 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1335 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1336 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1337 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1339 bdver4_memcpy,
1340 bdver4_memset,
1341 6, /* scalar_stmt_cost. */
1342 4, /* scalar load_cost. */
1343 4, /* scalar_store_cost. */
1344 6, /* vec_stmt_cost. */
1345 0, /* vec_to_scalar_cost. */
1346 2, /* scalar_to_vec_cost. */
1347 4, /* vec_align_load_cost. */
1348 4, /* vec_unalign_load_cost. */
1349 4, /* vec_store_cost. */
1350 4, /* cond_taken_branch_cost. */
1351 2, /* cond_not_taken_branch_cost. */
1354 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1355 very small blocks it is better to use loop. For large blocks, libcall can
1356 do nontemporary accesses and beat inline considerably. */
1357 static stringop_algs btver1_memcpy[2] = {
1358 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1359 {-1, rep_prefix_4_byte, false}}},
1360 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1361 {-1, libcall, false}}}};
1362 static stringop_algs btver1_memset[2] = {
1363 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1364 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1365 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1366 {-1, libcall, false}}}};
1367 const struct processor_costs btver1_cost = {
1368 COSTS_N_INSNS (1), /* cost of an add instruction */
1369 COSTS_N_INSNS (2), /* cost of a lea instruction */
1370 COSTS_N_INSNS (1), /* variable shift costs */
1371 COSTS_N_INSNS (1), /* constant shift costs */
1372 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1373 COSTS_N_INSNS (4), /* HI */
1374 COSTS_N_INSNS (3), /* SI */
1375 COSTS_N_INSNS (4), /* DI */
1376 COSTS_N_INSNS (5)}, /* other */
1377 0, /* cost of multiply per each bit set */
1378 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1379 COSTS_N_INSNS (35), /* HI */
1380 COSTS_N_INSNS (51), /* SI */
1381 COSTS_N_INSNS (83), /* DI */
1382 COSTS_N_INSNS (83)}, /* other */
1383 COSTS_N_INSNS (1), /* cost of movsx */
1384 COSTS_N_INSNS (1), /* cost of movzx */
1385 8, /* "large" insn */
1386 9, /* MOVE_RATIO */
1387 4, /* cost for loading QImode using movzbl */
1388 {3, 4, 3}, /* cost of loading integer registers
1389 in QImode, HImode and SImode.
1390 Relative to reg-reg move (2). */
1391 {3, 4, 3}, /* cost of storing integer registers */
1392 4, /* cost of reg,reg fld/fst */
1393 {4, 4, 12}, /* cost of loading fp registers
1394 in SFmode, DFmode and XFmode */
1395 {6, 6, 8}, /* cost of storing fp registers
1396 in SFmode, DFmode and XFmode */
1397 2, /* cost of moving MMX register */
1398 {3, 3}, /* cost of loading MMX registers
1399 in SImode and DImode */
1400 {4, 4}, /* cost of storing MMX registers
1401 in SImode and DImode */
1402 2, /* cost of moving SSE register */
1403 {4, 4, 3}, /* cost of loading SSE registers
1404 in SImode, DImode and TImode */
1405 {4, 4, 5}, /* cost of storing SSE registers
1406 in SImode, DImode and TImode */
1407 3, /* MMX or SSE register to integer */
1408 /* On K8:
1409 MOVD reg64, xmmreg Double FSTORE 4
1410 MOVD reg32, xmmreg Double FSTORE 4
1411 On AMDFAM10:
1412 MOVD reg64, xmmreg Double FADD 3
1413 1/1 1/1
1414 MOVD reg32, xmmreg Double FADD 3
1415 1/1 1/1 */
1416 32, /* size of l1 cache. */
1417 512, /* size of l2 cache. */
1418 64, /* size of prefetch block */
1419 100, /* number of parallel prefetches */
1420 2, /* Branch cost */
1421 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1422 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1423 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1424 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1425 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1426 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1428 btver1_memcpy,
1429 btver1_memset,
1430 4, /* scalar_stmt_cost. */
1431 2, /* scalar load_cost. */
1432 2, /* scalar_store_cost. */
1433 6, /* vec_stmt_cost. */
1434 0, /* vec_to_scalar_cost. */
1435 2, /* scalar_to_vec_cost. */
1436 2, /* vec_align_load_cost. */
1437 2, /* vec_unalign_load_cost. */
1438 2, /* vec_store_cost. */
1439 2, /* cond_taken_branch_cost. */
1440 1, /* cond_not_taken_branch_cost. */
1443 static stringop_algs btver2_memcpy[2] = {
1444 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1445 {-1, rep_prefix_4_byte, false}}},
1446 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1447 {-1, libcall, false}}}};
1448 static stringop_algs btver2_memset[2] = {
1449 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1450 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1451 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1452 {-1, libcall, false}}}};
1453 const struct processor_costs btver2_cost = {
1454 COSTS_N_INSNS (1), /* cost of an add instruction */
1455 COSTS_N_INSNS (2), /* cost of a lea instruction */
1456 COSTS_N_INSNS (1), /* variable shift costs */
1457 COSTS_N_INSNS (1), /* constant shift costs */
1458 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1459 COSTS_N_INSNS (4), /* HI */
1460 COSTS_N_INSNS (3), /* SI */
1461 COSTS_N_INSNS (4), /* DI */
1462 COSTS_N_INSNS (5)}, /* other */
1463 0, /* cost of multiply per each bit set */
1464 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1465 COSTS_N_INSNS (35), /* HI */
1466 COSTS_N_INSNS (51), /* SI */
1467 COSTS_N_INSNS (83), /* DI */
1468 COSTS_N_INSNS (83)}, /* other */
1469 COSTS_N_INSNS (1), /* cost of movsx */
1470 COSTS_N_INSNS (1), /* cost of movzx */
1471 8, /* "large" insn */
1472 9, /* MOVE_RATIO */
1473 4, /* cost for loading QImode using movzbl */
1474 {3, 4, 3}, /* cost of loading integer registers
1475 in QImode, HImode and SImode.
1476 Relative to reg-reg move (2). */
1477 {3, 4, 3}, /* cost of storing integer registers */
1478 4, /* cost of reg,reg fld/fst */
1479 {4, 4, 12}, /* cost of loading fp registers
1480 in SFmode, DFmode and XFmode */
1481 {6, 6, 8}, /* cost of storing fp registers
1482 in SFmode, DFmode and XFmode */
1483 2, /* cost of moving MMX register */
1484 {3, 3}, /* cost of loading MMX registers
1485 in SImode and DImode */
1486 {4, 4}, /* cost of storing MMX registers
1487 in SImode and DImode */
1488 2, /* cost of moving SSE register */
1489 {4, 4, 3}, /* cost of loading SSE registers
1490 in SImode, DImode and TImode */
1491 {4, 4, 5}, /* cost of storing SSE registers
1492 in SImode, DImode and TImode */
1493 3, /* MMX or SSE register to integer */
1494 /* On K8:
1495 MOVD reg64, xmmreg Double FSTORE 4
1496 MOVD reg32, xmmreg Double FSTORE 4
1497 On AMDFAM10:
1498 MOVD reg64, xmmreg Double FADD 3
1499 1/1 1/1
1500 MOVD reg32, xmmreg Double FADD 3
1501 1/1 1/1 */
1502 32, /* size of l1 cache. */
1503 2048, /* size of l2 cache. */
1504 64, /* size of prefetch block */
1505 100, /* number of parallel prefetches */
1506 2, /* Branch cost */
1507 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1508 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1509 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1510 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1511 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1512 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1513 btver2_memcpy,
1514 btver2_memset,
1515 4, /* scalar_stmt_cost. */
1516 2, /* scalar load_cost. */
1517 2, /* scalar_store_cost. */
1518 6, /* vec_stmt_cost. */
1519 0, /* vec_to_scalar_cost. */
1520 2, /* scalar_to_vec_cost. */
1521 2, /* vec_align_load_cost. */
1522 2, /* vec_unalign_load_cost. */
1523 2, /* vec_store_cost. */
1524 2, /* cond_taken_branch_cost. */
1525 1, /* cond_not_taken_branch_cost. */
1528 static stringop_algs pentium4_memcpy[2] = {
1529 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1530 DUMMY_STRINGOP_ALGS};
1531 static stringop_algs pentium4_memset[2] = {
1532 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1533 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1534 DUMMY_STRINGOP_ALGS};
1536 static const
1537 struct processor_costs pentium4_cost = {
1538 COSTS_N_INSNS (1), /* cost of an add instruction */
1539 COSTS_N_INSNS (3), /* cost of a lea instruction */
1540 COSTS_N_INSNS (4), /* variable shift costs */
1541 COSTS_N_INSNS (4), /* constant shift costs */
1542 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1543 COSTS_N_INSNS (15), /* HI */
1544 COSTS_N_INSNS (15), /* SI */
1545 COSTS_N_INSNS (15), /* DI */
1546 COSTS_N_INSNS (15)}, /* other */
1547 0, /* cost of multiply per each bit set */
1548 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1549 COSTS_N_INSNS (56), /* HI */
1550 COSTS_N_INSNS (56), /* SI */
1551 COSTS_N_INSNS (56), /* DI */
1552 COSTS_N_INSNS (56)}, /* other */
1553 COSTS_N_INSNS (1), /* cost of movsx */
1554 COSTS_N_INSNS (1), /* cost of movzx */
1555 16, /* "large" insn */
1556 6, /* MOVE_RATIO */
1557 2, /* cost for loading QImode using movzbl */
1558 {4, 5, 4}, /* cost of loading integer registers
1559 in QImode, HImode and SImode.
1560 Relative to reg-reg move (2). */
1561 {2, 3, 2}, /* cost of storing integer registers */
1562 2, /* cost of reg,reg fld/fst */
1563 {2, 2, 6}, /* cost of loading fp registers
1564 in SFmode, DFmode and XFmode */
1565 {4, 4, 6}, /* cost of storing fp registers
1566 in SFmode, DFmode and XFmode */
1567 2, /* cost of moving MMX register */
1568 {2, 2}, /* cost of loading MMX registers
1569 in SImode and DImode */
1570 {2, 2}, /* cost of storing MMX registers
1571 in SImode and DImode */
1572 12, /* cost of moving SSE register */
1573 {12, 12, 12}, /* cost of loading SSE registers
1574 in SImode, DImode and TImode */
1575 {2, 2, 8}, /* cost of storing SSE registers
1576 in SImode, DImode and TImode */
1577 10, /* MMX or SSE register to integer */
1578 8, /* size of l1 cache. */
1579 256, /* size of l2 cache. */
1580 64, /* size of prefetch block */
1581 6, /* number of parallel prefetches */
1582 2, /* Branch cost */
1583 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1584 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1585 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1586 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1587 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1588 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1589 pentium4_memcpy,
1590 pentium4_memset,
1591 1, /* scalar_stmt_cost. */
1592 1, /* scalar load_cost. */
1593 1, /* scalar_store_cost. */
1594 1, /* vec_stmt_cost. */
1595 1, /* vec_to_scalar_cost. */
1596 1, /* scalar_to_vec_cost. */
1597 1, /* vec_align_load_cost. */
1598 2, /* vec_unalign_load_cost. */
1599 1, /* vec_store_cost. */
1600 3, /* cond_taken_branch_cost. */
1601 1, /* cond_not_taken_branch_cost. */
1604 static stringop_algs nocona_memcpy[2] = {
1605 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1606 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1607 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1609 static stringop_algs nocona_memset[2] = {
1610 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1611 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1612 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1613 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1615 static const
1616 struct processor_costs nocona_cost = {
1617 COSTS_N_INSNS (1), /* cost of an add instruction */
1618 COSTS_N_INSNS (1), /* cost of a lea instruction */
1619 COSTS_N_INSNS (1), /* variable shift costs */
1620 COSTS_N_INSNS (1), /* constant shift costs */
1621 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1622 COSTS_N_INSNS (10), /* HI */
1623 COSTS_N_INSNS (10), /* SI */
1624 COSTS_N_INSNS (10), /* DI */
1625 COSTS_N_INSNS (10)}, /* other */
1626 0, /* cost of multiply per each bit set */
1627 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1628 COSTS_N_INSNS (66), /* HI */
1629 COSTS_N_INSNS (66), /* SI */
1630 COSTS_N_INSNS (66), /* DI */
1631 COSTS_N_INSNS (66)}, /* other */
1632 COSTS_N_INSNS (1), /* cost of movsx */
1633 COSTS_N_INSNS (1), /* cost of movzx */
1634 16, /* "large" insn */
1635 17, /* MOVE_RATIO */
1636 4, /* cost for loading QImode using movzbl */
1637 {4, 4, 4}, /* cost of loading integer registers
1638 in QImode, HImode and SImode.
1639 Relative to reg-reg move (2). */
1640 {4, 4, 4}, /* cost of storing integer registers */
1641 3, /* cost of reg,reg fld/fst */
1642 {12, 12, 12}, /* cost of loading fp registers
1643 in SFmode, DFmode and XFmode */
1644 {4, 4, 4}, /* cost of storing fp registers
1645 in SFmode, DFmode and XFmode */
1646 6, /* cost of moving MMX register */
1647 {12, 12}, /* cost of loading MMX registers
1648 in SImode and DImode */
1649 {12, 12}, /* cost of storing MMX registers
1650 in SImode and DImode */
1651 6, /* cost of moving SSE register */
1652 {12, 12, 12}, /* cost of loading SSE registers
1653 in SImode, DImode and TImode */
1654 {12, 12, 12}, /* cost of storing SSE registers
1655 in SImode, DImode and TImode */
1656 8, /* MMX or SSE register to integer */
1657 8, /* size of l1 cache. */
1658 1024, /* size of l2 cache. */
1659 64, /* size of prefetch block */
1660 8, /* number of parallel prefetches */
1661 1, /* Branch cost */
1662 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1663 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1664 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1665 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1666 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1667 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1668 nocona_memcpy,
1669 nocona_memset,
1670 1, /* scalar_stmt_cost. */
1671 1, /* scalar load_cost. */
1672 1, /* scalar_store_cost. */
1673 1, /* vec_stmt_cost. */
1674 1, /* vec_to_scalar_cost. */
1675 1, /* scalar_to_vec_cost. */
1676 1, /* vec_align_load_cost. */
1677 2, /* vec_unalign_load_cost. */
1678 1, /* vec_store_cost. */
1679 3, /* cond_taken_branch_cost. */
1680 1, /* cond_not_taken_branch_cost. */
1683 static stringop_algs atom_memcpy[2] = {
1684 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1685 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1686 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1687 static stringop_algs atom_memset[2] = {
1688 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1689 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1690 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1691 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1692 static const
1693 struct processor_costs atom_cost = {
1694 COSTS_N_INSNS (1), /* cost of an add instruction */
1695 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1696 COSTS_N_INSNS (1), /* variable shift costs */
1697 COSTS_N_INSNS (1), /* constant shift costs */
1698 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1699 COSTS_N_INSNS (4), /* HI */
1700 COSTS_N_INSNS (3), /* SI */
1701 COSTS_N_INSNS (4), /* DI */
1702 COSTS_N_INSNS (2)}, /* other */
1703 0, /* cost of multiply per each bit set */
1704 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1705 COSTS_N_INSNS (26), /* HI */
1706 COSTS_N_INSNS (42), /* SI */
1707 COSTS_N_INSNS (74), /* DI */
1708 COSTS_N_INSNS (74)}, /* other */
1709 COSTS_N_INSNS (1), /* cost of movsx */
1710 COSTS_N_INSNS (1), /* cost of movzx */
1711 8, /* "large" insn */
1712 17, /* MOVE_RATIO */
1713 4, /* cost for loading QImode using movzbl */
1714 {4, 4, 4}, /* cost of loading integer registers
1715 in QImode, HImode and SImode.
1716 Relative to reg-reg move (2). */
1717 {4, 4, 4}, /* cost of storing integer registers */
1718 4, /* cost of reg,reg fld/fst */
1719 {12, 12, 12}, /* cost of loading fp registers
1720 in SFmode, DFmode and XFmode */
1721 {6, 6, 8}, /* cost of storing fp registers
1722 in SFmode, DFmode and XFmode */
1723 2, /* cost of moving MMX register */
1724 {8, 8}, /* cost of loading MMX registers
1725 in SImode and DImode */
1726 {8, 8}, /* cost of storing MMX registers
1727 in SImode and DImode */
1728 2, /* cost of moving SSE register */
1729 {8, 8, 8}, /* cost of loading SSE registers
1730 in SImode, DImode and TImode */
1731 {8, 8, 8}, /* cost of storing SSE registers
1732 in SImode, DImode and TImode */
1733 5, /* MMX or SSE register to integer */
1734 32, /* size of l1 cache. */
1735 256, /* size of l2 cache. */
1736 64, /* size of prefetch block */
1737 6, /* number of parallel prefetches */
1738 3, /* Branch cost */
1739 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1740 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1741 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1742 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1743 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1744 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1745 atom_memcpy,
1746 atom_memset,
1747 1, /* scalar_stmt_cost. */
1748 1, /* scalar load_cost. */
1749 1, /* scalar_store_cost. */
1750 1, /* vec_stmt_cost. */
1751 1, /* vec_to_scalar_cost. */
1752 1, /* scalar_to_vec_cost. */
1753 1, /* vec_align_load_cost. */
1754 2, /* vec_unalign_load_cost. */
1755 1, /* vec_store_cost. */
1756 3, /* cond_taken_branch_cost. */
1757 1, /* cond_not_taken_branch_cost. */
1760 static stringop_algs slm_memcpy[2] = {
1761 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1762 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1763 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1764 static stringop_algs slm_memset[2] = {
1765 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1766 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1767 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1768 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1769 static const
1770 struct processor_costs slm_cost = {
1771 COSTS_N_INSNS (1), /* cost of an add instruction */
1772 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1773 COSTS_N_INSNS (1), /* variable shift costs */
1774 COSTS_N_INSNS (1), /* constant shift costs */
1775 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1776 COSTS_N_INSNS (3), /* HI */
1777 COSTS_N_INSNS (3), /* SI */
1778 COSTS_N_INSNS (4), /* DI */
1779 COSTS_N_INSNS (2)}, /* other */
1780 0, /* cost of multiply per each bit set */
1781 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1782 COSTS_N_INSNS (26), /* HI */
1783 COSTS_N_INSNS (42), /* SI */
1784 COSTS_N_INSNS (74), /* DI */
1785 COSTS_N_INSNS (74)}, /* other */
1786 COSTS_N_INSNS (1), /* cost of movsx */
1787 COSTS_N_INSNS (1), /* cost of movzx */
1788 8, /* "large" insn */
1789 17, /* MOVE_RATIO */
1790 4, /* cost for loading QImode using movzbl */
1791 {4, 4, 4}, /* cost of loading integer registers
1792 in QImode, HImode and SImode.
1793 Relative to reg-reg move (2). */
1794 {4, 4, 4}, /* cost of storing integer registers */
1795 4, /* cost of reg,reg fld/fst */
1796 {12, 12, 12}, /* cost of loading fp registers
1797 in SFmode, DFmode and XFmode */
1798 {6, 6, 8}, /* cost of storing fp registers
1799 in SFmode, DFmode and XFmode */
1800 2, /* cost of moving MMX register */
1801 {8, 8}, /* cost of loading MMX registers
1802 in SImode and DImode */
1803 {8, 8}, /* cost of storing MMX registers
1804 in SImode and DImode */
1805 2, /* cost of moving SSE register */
1806 {8, 8, 8}, /* cost of loading SSE registers
1807 in SImode, DImode and TImode */
1808 {8, 8, 8}, /* cost of storing SSE registers
1809 in SImode, DImode and TImode */
1810 5, /* MMX or SSE register to integer */
1811 32, /* size of l1 cache. */
1812 256, /* size of l2 cache. */
1813 64, /* size of prefetch block */
1814 6, /* number of parallel prefetches */
1815 3, /* Branch cost */
1816 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1817 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1818 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1819 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1820 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1821 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1822 slm_memcpy,
1823 slm_memset,
1824 1, /* scalar_stmt_cost. */
1825 1, /* scalar load_cost. */
1826 1, /* scalar_store_cost. */
1827 1, /* vec_stmt_cost. */
1828 4, /* vec_to_scalar_cost. */
1829 1, /* scalar_to_vec_cost. */
1830 1, /* vec_align_load_cost. */
1831 2, /* vec_unalign_load_cost. */
1832 1, /* vec_store_cost. */
1833 3, /* cond_taken_branch_cost. */
1834 1, /* cond_not_taken_branch_cost. */
1837 static stringop_algs intel_memcpy[2] = {
1838 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1839 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1840 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1841 static stringop_algs intel_memset[2] = {
1842 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1843 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1844 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1845 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1846 static const
1847 struct processor_costs intel_cost = {
1848 COSTS_N_INSNS (1), /* cost of an add instruction */
1849 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1850 COSTS_N_INSNS (1), /* variable shift costs */
1851 COSTS_N_INSNS (1), /* constant shift costs */
1852 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1853 COSTS_N_INSNS (3), /* HI */
1854 COSTS_N_INSNS (3), /* SI */
1855 COSTS_N_INSNS (4), /* DI */
1856 COSTS_N_INSNS (2)}, /* other */
1857 0, /* cost of multiply per each bit set */
1858 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1859 COSTS_N_INSNS (26), /* HI */
1860 COSTS_N_INSNS (42), /* SI */
1861 COSTS_N_INSNS (74), /* DI */
1862 COSTS_N_INSNS (74)}, /* other */
1863 COSTS_N_INSNS (1), /* cost of movsx */
1864 COSTS_N_INSNS (1), /* cost of movzx */
1865 8, /* "large" insn */
1866 17, /* MOVE_RATIO */
1867 4, /* cost for loading QImode using movzbl */
1868 {4, 4, 4}, /* cost of loading integer registers
1869 in QImode, HImode and SImode.
1870 Relative to reg-reg move (2). */
1871 {4, 4, 4}, /* cost of storing integer registers */
1872 4, /* cost of reg,reg fld/fst */
1873 {12, 12, 12}, /* cost of loading fp registers
1874 in SFmode, DFmode and XFmode */
1875 {6, 6, 8}, /* cost of storing fp registers
1876 in SFmode, DFmode and XFmode */
1877 2, /* cost of moving MMX register */
1878 {8, 8}, /* cost of loading MMX registers
1879 in SImode and DImode */
1880 {8, 8}, /* cost of storing MMX registers
1881 in SImode and DImode */
1882 2, /* cost of moving SSE register */
1883 {8, 8, 8}, /* cost of loading SSE registers
1884 in SImode, DImode and TImode */
1885 {8, 8, 8}, /* cost of storing SSE registers
1886 in SImode, DImode and TImode */
1887 5, /* MMX or SSE register to integer */
1888 32, /* size of l1 cache. */
1889 256, /* size of l2 cache. */
1890 64, /* size of prefetch block */
1891 6, /* number of parallel prefetches */
1892 3, /* Branch cost */
1893 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1894 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1895 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1896 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1897 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1898 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1899 intel_memcpy,
1900 intel_memset,
1901 1, /* scalar_stmt_cost. */
1902 1, /* scalar load_cost. */
1903 1, /* scalar_store_cost. */
1904 1, /* vec_stmt_cost. */
1905 4, /* vec_to_scalar_cost. */
1906 1, /* scalar_to_vec_cost. */
1907 1, /* vec_align_load_cost. */
1908 2, /* vec_unalign_load_cost. */
1909 1, /* vec_store_cost. */
1910 3, /* cond_taken_branch_cost. */
1911 1, /* cond_not_taken_branch_cost. */
1914 /* Generic should produce code tuned for Core-i7 (and newer chips)
1915 and btver1 (and newer chips). */
1917 static stringop_algs generic_memcpy[2] = {
1918 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1919 {-1, libcall, false}}},
1920 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1921 {-1, libcall, false}}}};
1922 static stringop_algs generic_memset[2] = {
1923 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1924 {-1, libcall, false}}},
1925 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1926 {-1, libcall, false}}}};
1927 static const
1928 struct processor_costs generic_cost = {
1929 COSTS_N_INSNS (1), /* cost of an add instruction */
1930 /* On all chips taken into consideration lea is 2 cycles and more. With
1931 this cost however our current implementation of synth_mult results in
1932 use of unnecessary temporary registers causing regression on several
1933 SPECfp benchmarks. */
1934 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1935 COSTS_N_INSNS (1), /* variable shift costs */
1936 COSTS_N_INSNS (1), /* constant shift costs */
1937 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1938 COSTS_N_INSNS (4), /* HI */
1939 COSTS_N_INSNS (3), /* SI */
1940 COSTS_N_INSNS (4), /* DI */
1941 COSTS_N_INSNS (2)}, /* other */
1942 0, /* cost of multiply per each bit set */
1943 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1944 COSTS_N_INSNS (26), /* HI */
1945 COSTS_N_INSNS (42), /* SI */
1946 COSTS_N_INSNS (74), /* DI */
1947 COSTS_N_INSNS (74)}, /* other */
1948 COSTS_N_INSNS (1), /* cost of movsx */
1949 COSTS_N_INSNS (1), /* cost of movzx */
1950 8, /* "large" insn */
1951 17, /* MOVE_RATIO */
1952 4, /* cost for loading QImode using movzbl */
1953 {4, 4, 4}, /* cost of loading integer registers
1954 in QImode, HImode and SImode.
1955 Relative to reg-reg move (2). */
1956 {4, 4, 4}, /* cost of storing integer registers */
1957 4, /* cost of reg,reg fld/fst */
1958 {12, 12, 12}, /* cost of loading fp registers
1959 in SFmode, DFmode and XFmode */
1960 {6, 6, 8}, /* cost of storing fp registers
1961 in SFmode, DFmode and XFmode */
1962 2, /* cost of moving MMX register */
1963 {8, 8}, /* cost of loading MMX registers
1964 in SImode and DImode */
1965 {8, 8}, /* cost of storing MMX registers
1966 in SImode and DImode */
1967 2, /* cost of moving SSE register */
1968 {8, 8, 8}, /* cost of loading SSE registers
1969 in SImode, DImode and TImode */
1970 {8, 8, 8}, /* cost of storing SSE registers
1971 in SImode, DImode and TImode */
1972 5, /* MMX or SSE register to integer */
1973 32, /* size of l1 cache. */
1974 512, /* size of l2 cache. */
1975 64, /* size of prefetch block */
1976 6, /* number of parallel prefetches */
1977 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1978 value is increased to perhaps more appropriate value of 5. */
1979 3, /* Branch cost */
1980 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1981 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1982 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1983 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1984 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1985 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1986 generic_memcpy,
1987 generic_memset,
1988 1, /* scalar_stmt_cost. */
1989 1, /* scalar load_cost. */
1990 1, /* scalar_store_cost. */
1991 1, /* vec_stmt_cost. */
1992 1, /* vec_to_scalar_cost. */
1993 1, /* scalar_to_vec_cost. */
1994 1, /* vec_align_load_cost. */
1995 2, /* vec_unalign_load_cost. */
1996 1, /* vec_store_cost. */
1997 3, /* cond_taken_branch_cost. */
1998 1, /* cond_not_taken_branch_cost. */
2001 /* core_cost should produce code tuned for Core familly of CPUs. */
2002 static stringop_algs core_memcpy[2] = {
2003 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
2004 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
2005 {-1, libcall, false}}}};
2006 static stringop_algs core_memset[2] = {
2007 {libcall, {{6, loop_1_byte, true},
2008 {24, loop, true},
2009 {8192, rep_prefix_4_byte, true},
2010 {-1, libcall, false}}},
2011 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
2012 {-1, libcall, false}}}};
2014 static const
2015 struct processor_costs core_cost = {
2016 COSTS_N_INSNS (1), /* cost of an add instruction */
2017 /* On all chips taken into consideration lea is 2 cycles and more. With
2018 this cost however our current implementation of synth_mult results in
2019 use of unnecessary temporary registers causing regression on several
2020 SPECfp benchmarks. */
2021 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
2022 COSTS_N_INSNS (1), /* variable shift costs */
2023 COSTS_N_INSNS (1), /* constant shift costs */
2024 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
2025 COSTS_N_INSNS (4), /* HI */
2026 COSTS_N_INSNS (3), /* SI */
2027 COSTS_N_INSNS (4), /* DI */
2028 COSTS_N_INSNS (2)}, /* other */
2029 0, /* cost of multiply per each bit set */
2030 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
2031 COSTS_N_INSNS (26), /* HI */
2032 COSTS_N_INSNS (42), /* SI */
2033 COSTS_N_INSNS (74), /* DI */
2034 COSTS_N_INSNS (74)}, /* other */
2035 COSTS_N_INSNS (1), /* cost of movsx */
2036 COSTS_N_INSNS (1), /* cost of movzx */
2037 8, /* "large" insn */
2038 17, /* MOVE_RATIO */
2039 4, /* cost for loading QImode using movzbl */
2040 {4, 4, 4}, /* cost of loading integer registers
2041 in QImode, HImode and SImode.
2042 Relative to reg-reg move (2). */
2043 {4, 4, 4}, /* cost of storing integer registers */
2044 4, /* cost of reg,reg fld/fst */
2045 {12, 12, 12}, /* cost of loading fp registers
2046 in SFmode, DFmode and XFmode */
2047 {6, 6, 8}, /* cost of storing fp registers
2048 in SFmode, DFmode and XFmode */
2049 2, /* cost of moving MMX register */
2050 {8, 8}, /* cost of loading MMX registers
2051 in SImode and DImode */
2052 {8, 8}, /* cost of storing MMX registers
2053 in SImode and DImode */
2054 2, /* cost of moving SSE register */
2055 {8, 8, 8}, /* cost of loading SSE registers
2056 in SImode, DImode and TImode */
2057 {8, 8, 8}, /* cost of storing SSE registers
2058 in SImode, DImode and TImode */
2059 5, /* MMX or SSE register to integer */
2060 64, /* size of l1 cache. */
2061 512, /* size of l2 cache. */
2062 64, /* size of prefetch block */
2063 6, /* number of parallel prefetches */
2064 /* FIXME perhaps more appropriate value is 5. */
2065 3, /* Branch cost */
2066 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2067 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2068 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2069 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2070 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2071 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2072 core_memcpy,
2073 core_memset,
2074 1, /* scalar_stmt_cost. */
2075 1, /* scalar load_cost. */
2076 1, /* scalar_store_cost. */
2077 1, /* vec_stmt_cost. */
2078 1, /* vec_to_scalar_cost. */
2079 1, /* scalar_to_vec_cost. */
2080 1, /* vec_align_load_cost. */
2081 2, /* vec_unalign_load_cost. */
2082 1, /* vec_store_cost. */
2083 3, /* cond_taken_branch_cost. */
2084 1, /* cond_not_taken_branch_cost. */
2088 /* Set by -mtune. */
2089 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2091 /* Set by -mtune or -Os. */
2092 const struct processor_costs *ix86_cost = &pentium_cost;
2094 /* Processor feature/optimization bitmasks. */
2095 #define m_386 (1<<PROCESSOR_I386)
2096 #define m_486 (1<<PROCESSOR_I486)
2097 #define m_PENT (1<<PROCESSOR_PENTIUM)
2098 #define m_IAMCU (1<<PROCESSOR_IAMCU)
2099 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2100 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2101 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2102 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2103 #define m_CORE2 (1<<PROCESSOR_CORE2)
2104 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2105 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2106 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2107 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2108 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2109 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2110 #define m_KNL (1<<PROCESSOR_KNL)
2111 #define m_INTEL (1<<PROCESSOR_INTEL)
2113 #define m_GEODE (1<<PROCESSOR_GEODE)
2114 #define m_K6 (1<<PROCESSOR_K6)
2115 #define m_K6_GEODE (m_K6 | m_GEODE)
2116 #define m_K8 (1<<PROCESSOR_K8)
2117 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2118 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2119 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2120 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2121 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2122 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2123 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2124 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2125 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2126 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2127 #define m_BTVER (m_BTVER1 | m_BTVER2)
2128 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2130 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2132 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2133 #undef DEF_TUNE
2134 #define DEF_TUNE(tune, name, selector) name,
2135 #include "x86-tune.def"
2136 #undef DEF_TUNE
2139 /* Feature tests against the various tunings. */
2140 unsigned char ix86_tune_features[X86_TUNE_LAST];
2142 /* Feature tests against the various tunings used to create ix86_tune_features
2143 based on the processor mask. */
2144 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2145 #undef DEF_TUNE
2146 #define DEF_TUNE(tune, name, selector) selector,
2147 #include "x86-tune.def"
2148 #undef DEF_TUNE
2151 /* Feature tests against the various architecture variations. */
2152 unsigned char ix86_arch_features[X86_ARCH_LAST];
2154 /* Feature tests against the various architecture variations, used to create
2155 ix86_arch_features based on the processor mask. */
2156 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2157 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2158 ~(m_386 | m_486 | m_PENT | m_IAMCU | m_K6),
2160 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2161 ~m_386,
2163 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2164 ~(m_386 | m_486),
2166 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2167 ~m_386,
2169 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2170 ~m_386,
2173 /* In case the average insn count for single function invocation is
2174 lower than this constant, emit fast (but longer) prologue and
2175 epilogue code. */
2176 #define FAST_PROLOGUE_INSN_COUNT 20
2178 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2179 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2180 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2181 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2183 /* Array of the smallest class containing reg number REGNO, indexed by
2184 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2186 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2188 /* ax, dx, cx, bx */
2189 AREG, DREG, CREG, BREG,
2190 /* si, di, bp, sp */
2191 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2192 /* FP registers */
2193 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2194 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2195 /* arg pointer */
2196 NON_Q_REGS,
2197 /* flags, fpsr, fpcr, frame */
2198 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2199 /* SSE registers */
2200 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2201 SSE_REGS, SSE_REGS,
2202 /* MMX registers */
2203 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2204 MMX_REGS, MMX_REGS,
2205 /* REX registers */
2206 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2207 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2208 /* SSE REX registers */
2209 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2210 SSE_REGS, SSE_REGS,
2211 /* AVX-512 SSE registers */
2212 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2213 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2214 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2215 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2216 /* Mask registers. */
2217 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2218 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2219 /* MPX bound registers */
2220 BND_REGS, BND_REGS, BND_REGS, BND_REGS,
2223 /* The "default" register map used in 32bit mode. */
2225 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2227 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2228 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2229 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2230 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2231 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2232 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2233 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2234 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2235 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2236 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2237 101, 102, 103, 104, /* bound registers */
2240 /* The "default" register map used in 64bit mode. */
2242 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2244 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2245 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2246 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2247 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2248 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2249 8,9,10,11,12,13,14,15, /* extended integer registers */
2250 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2251 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2252 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2253 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2254 126, 127, 128, 129, /* bound registers */
2257 /* Define the register numbers to be used in Dwarf debugging information.
2258 The SVR4 reference port C compiler uses the following register numbers
2259 in its Dwarf output code:
2260 0 for %eax (gcc regno = 0)
2261 1 for %ecx (gcc regno = 2)
2262 2 for %edx (gcc regno = 1)
2263 3 for %ebx (gcc regno = 3)
2264 4 for %esp (gcc regno = 7)
2265 5 for %ebp (gcc regno = 6)
2266 6 for %esi (gcc regno = 4)
2267 7 for %edi (gcc regno = 5)
2268 The following three DWARF register numbers are never generated by
2269 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2270 believes these numbers have these meanings.
2271 8 for %eip (no gcc equivalent)
2272 9 for %eflags (gcc regno = 17)
2273 10 for %trapno (no gcc equivalent)
2274 It is not at all clear how we should number the FP stack registers
2275 for the x86 architecture. If the version of SDB on x86/svr4 were
2276 a bit less brain dead with respect to floating-point then we would
2277 have a precedent to follow with respect to DWARF register numbers
2278 for x86 FP registers, but the SDB on x86/svr4 is so completely
2279 broken with respect to FP registers that it is hardly worth thinking
2280 of it as something to strive for compatibility with.
2281 The version of x86/svr4 SDB I have at the moment does (partially)
2282 seem to believe that DWARF register number 11 is associated with
2283 the x86 register %st(0), but that's about all. Higher DWARF
2284 register numbers don't seem to be associated with anything in
2285 particular, and even for DWARF regno 11, SDB only seems to under-
2286 stand that it should say that a variable lives in %st(0) (when
2287 asked via an `=' command) if we said it was in DWARF regno 11,
2288 but SDB still prints garbage when asked for the value of the
2289 variable in question (via a `/' command).
2290 (Also note that the labels SDB prints for various FP stack regs
2291 when doing an `x' command are all wrong.)
2292 Note that these problems generally don't affect the native SVR4
2293 C compiler because it doesn't allow the use of -O with -g and
2294 because when it is *not* optimizing, it allocates a memory
2295 location for each floating-point variable, and the memory
2296 location is what gets described in the DWARF AT_location
2297 attribute for the variable in question.
2298 Regardless of the severe mental illness of the x86/svr4 SDB, we
2299 do something sensible here and we use the following DWARF
2300 register numbers. Note that these are all stack-top-relative
2301 numbers.
2302 11 for %st(0) (gcc regno = 8)
2303 12 for %st(1) (gcc regno = 9)
2304 13 for %st(2) (gcc regno = 10)
2305 14 for %st(3) (gcc regno = 11)
2306 15 for %st(4) (gcc regno = 12)
2307 16 for %st(5) (gcc regno = 13)
2308 17 for %st(6) (gcc regno = 14)
2309 18 for %st(7) (gcc regno = 15)
2311 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2313 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2314 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2315 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2316 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2317 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2318 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2319 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2320 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2321 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2322 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2323 101, 102, 103, 104, /* bound registers */
2326 /* Define parameter passing and return registers. */
2328 static int const x86_64_int_parameter_registers[6] =
2330 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2333 static int const x86_64_ms_abi_int_parameter_registers[4] =
2335 CX_REG, DX_REG, R8_REG, R9_REG
2338 static int const x86_64_int_return_registers[4] =
2340 AX_REG, DX_REG, DI_REG, SI_REG
2343 /* Additional registers that are clobbered by SYSV calls. */
2345 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2347 SI_REG, DI_REG,
2348 XMM6_REG, XMM7_REG,
2349 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2350 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2353 /* Define the structure for the machine field in struct function. */
2355 struct GTY(()) stack_local_entry {
2356 unsigned short mode;
2357 unsigned short n;
2358 rtx rtl;
2359 struct stack_local_entry *next;
2362 /* Structure describing stack frame layout.
2363 Stack grows downward:
2365 [arguments]
2366 <- ARG_POINTER
2367 saved pc
2369 saved static chain if ix86_static_chain_on_stack
2371 saved frame pointer if frame_pointer_needed
2372 <- HARD_FRAME_POINTER
2373 [saved regs]
2374 <- regs_save_offset
2375 [padding0]
2377 [saved SSE regs]
2378 <- sse_regs_save_offset
2379 [padding1] |
2380 | <- FRAME_POINTER
2381 [va_arg registers] |
2383 [frame] |
2385 [padding2] | = to_allocate
2386 <- STACK_POINTER
2388 struct ix86_frame
2390 int nsseregs;
2391 int nregs;
2392 int va_arg_size;
2393 int red_zone_size;
2394 int outgoing_arguments_size;
2396 /* The offsets relative to ARG_POINTER. */
2397 HOST_WIDE_INT frame_pointer_offset;
2398 HOST_WIDE_INT hard_frame_pointer_offset;
2399 HOST_WIDE_INT stack_pointer_offset;
2400 HOST_WIDE_INT hfp_save_offset;
2401 HOST_WIDE_INT reg_save_offset;
2402 HOST_WIDE_INT sse_reg_save_offset;
2404 /* When save_regs_using_mov is set, emit prologue using
2405 move instead of push instructions. */
2406 bool save_regs_using_mov;
2409 /* Which cpu are we scheduling for. */
2410 enum attr_cpu ix86_schedule;
2412 /* Which cpu are we optimizing for. */
2413 enum processor_type ix86_tune;
2415 /* Which instruction set architecture to use. */
2416 enum processor_type ix86_arch;
2418 /* True if processor has SSE prefetch instruction. */
2419 unsigned char x86_prefetch_sse;
2421 /* -mstackrealign option */
2422 static const char ix86_force_align_arg_pointer_string[]
2423 = "force_align_arg_pointer";
2425 static rtx (*ix86_gen_leave) (void);
2426 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2427 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2428 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2429 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2430 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2431 static rtx (*ix86_gen_monitorx) (rtx, rtx, rtx);
2432 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2433 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2434 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2435 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2436 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2437 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2439 /* Preferred alignment for stack boundary in bits. */
2440 unsigned int ix86_preferred_stack_boundary;
2442 /* Alignment for incoming stack boundary in bits specified at
2443 command line. */
2444 static unsigned int ix86_user_incoming_stack_boundary;
2446 /* Default alignment for incoming stack boundary in bits. */
2447 static unsigned int ix86_default_incoming_stack_boundary;
2449 /* Alignment for incoming stack boundary in bits. */
2450 unsigned int ix86_incoming_stack_boundary;
2452 /* Calling abi specific va_list type nodes. */
2453 static GTY(()) tree sysv_va_list_type_node;
2454 static GTY(()) tree ms_va_list_type_node;
2456 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2457 char internal_label_prefix[16];
2458 int internal_label_prefix_len;
2460 /* Fence to use after loop using movnt. */
2461 tree x86_mfence;
2463 /* Register class used for passing given 64bit part of the argument.
2464 These represent classes as documented by the PS ABI, with the exception
2465 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2466 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2468 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2469 whenever possible (upper half does contain padding). */
2470 enum x86_64_reg_class
2472 X86_64_NO_CLASS,
2473 X86_64_INTEGER_CLASS,
2474 X86_64_INTEGERSI_CLASS,
2475 X86_64_SSE_CLASS,
2476 X86_64_SSESF_CLASS,
2477 X86_64_SSEDF_CLASS,
2478 X86_64_SSEUP_CLASS,
2479 X86_64_X87_CLASS,
2480 X86_64_X87UP_CLASS,
2481 X86_64_COMPLEX_X87_CLASS,
2482 X86_64_MEMORY_CLASS
2485 #define MAX_CLASSES 8
2487 /* Table of constants used by fldpi, fldln2, etc.... */
2488 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2489 static bool ext_80387_constants_init = 0;
2492 static struct machine_function * ix86_init_machine_status (void);
2493 static rtx ix86_function_value (const_tree, const_tree, bool);
2494 static bool ix86_function_value_regno_p (const unsigned int);
2495 static unsigned int ix86_function_arg_boundary (machine_mode,
2496 const_tree);
2497 static rtx ix86_static_chain (const_tree, bool);
2498 static int ix86_function_regparm (const_tree, const_tree);
2499 static void ix86_compute_frame_layout (struct ix86_frame *);
2500 static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
2501 rtx, rtx, int);
2502 static void ix86_add_new_builtins (HOST_WIDE_INT);
2503 static tree ix86_canonical_va_list_type (tree);
2504 static void predict_jump (int);
2505 static unsigned int split_stack_prologue_scratch_regno (void);
2506 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2508 enum ix86_function_specific_strings
2510 IX86_FUNCTION_SPECIFIC_ARCH,
2511 IX86_FUNCTION_SPECIFIC_TUNE,
2512 IX86_FUNCTION_SPECIFIC_MAX
2515 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2516 const char *, enum fpmath_unit, bool);
2517 static void ix86_function_specific_save (struct cl_target_option *,
2518 struct gcc_options *opts);
2519 static void ix86_function_specific_restore (struct gcc_options *opts,
2520 struct cl_target_option *);
2521 static void ix86_function_specific_post_stream_in (struct cl_target_option *);
2522 static void ix86_function_specific_print (FILE *, int,
2523 struct cl_target_option *);
2524 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2525 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2526 struct gcc_options *,
2527 struct gcc_options *,
2528 struct gcc_options *);
2529 static bool ix86_can_inline_p (tree, tree);
2530 static void ix86_set_current_function (tree);
2531 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2533 static enum calling_abi ix86_function_abi (const_tree);
2536 #ifndef SUBTARGET32_DEFAULT_CPU
2537 #define SUBTARGET32_DEFAULT_CPU "i386"
2538 #endif
2540 /* Whether -mtune= or -march= were specified */
2541 static int ix86_tune_defaulted;
2542 static int ix86_arch_specified;
2544 /* Vectorization library interface and handlers. */
2545 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2547 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2548 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2550 /* Processor target table, indexed by processor number */
2551 struct ptt
2553 const char *const name; /* processor name */
2554 const struct processor_costs *cost; /* Processor costs */
2555 const int align_loop; /* Default alignments. */
2556 const int align_loop_max_skip;
2557 const int align_jump;
2558 const int align_jump_max_skip;
2559 const int align_func;
2562 /* This table must be in sync with enum processor_type in i386.h. */
2563 static const struct ptt processor_target_table[PROCESSOR_max] =
2565 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2566 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2567 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2568 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2569 {"iamcu", &iamcu_cost, 16, 7, 16, 7, 16},
2570 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2571 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2572 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2573 {"core2", &core_cost, 16, 10, 16, 10, 16},
2574 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2575 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2576 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2577 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2578 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2579 {"knl", &slm_cost, 16, 15, 16, 7, 16},
2580 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2581 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2582 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2583 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2584 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2585 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2586 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2587 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2588 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2589 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2590 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2591 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2594 static unsigned int
2595 rest_of_handle_insert_vzeroupper (void)
2597 int i;
2599 /* vzeroupper instructions are inserted immediately after reload to
2600 account for possible spills from 256bit registers. The pass
2601 reuses mode switching infrastructure by re-running mode insertion
2602 pass, so disable entities that have already been processed. */
2603 for (i = 0; i < MAX_386_ENTITIES; i++)
2604 ix86_optimize_mode_switching[i] = 0;
2606 ix86_optimize_mode_switching[AVX_U128] = 1;
2608 /* Call optimize_mode_switching. */
2609 g->get_passes ()->execute_pass_mode_switching ();
2610 return 0;
2613 namespace {
2615 const pass_data pass_data_insert_vzeroupper =
2617 RTL_PASS, /* type */
2618 "vzeroupper", /* name */
2619 OPTGROUP_NONE, /* optinfo_flags */
2620 TV_NONE, /* tv_id */
2621 0, /* properties_required */
2622 0, /* properties_provided */
2623 0, /* properties_destroyed */
2624 0, /* todo_flags_start */
2625 TODO_df_finish, /* todo_flags_finish */
2628 class pass_insert_vzeroupper : public rtl_opt_pass
2630 public:
2631 pass_insert_vzeroupper(gcc::context *ctxt)
2632 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2635 /* opt_pass methods: */
2636 virtual bool gate (function *)
2638 return TARGET_AVX && !TARGET_AVX512F
2639 && TARGET_VZEROUPPER && flag_expensive_optimizations
2640 && !optimize_size;
2643 virtual unsigned int execute (function *)
2645 return rest_of_handle_insert_vzeroupper ();
2648 }; // class pass_insert_vzeroupper
2650 } // anon namespace
2652 rtl_opt_pass *
2653 make_pass_insert_vzeroupper (gcc::context *ctxt)
2655 return new pass_insert_vzeroupper (ctxt);
2658 /* Return true if a red-zone is in use. */
2660 static inline bool
2661 ix86_using_red_zone (void)
2663 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2666 /* Return a string that documents the current -m options. The caller is
2667 responsible for freeing the string. */
2669 static char *
2670 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2671 const char *tune, enum fpmath_unit fpmath,
2672 bool add_nl_p)
2674 struct ix86_target_opts
2676 const char *option; /* option string */
2677 HOST_WIDE_INT mask; /* isa mask options */
2680 /* This table is ordered so that options like -msse4.2 that imply
2681 preceding options while match those first. */
2682 static struct ix86_target_opts isa_opts[] =
2684 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2685 { "-mfma", OPTION_MASK_ISA_FMA },
2686 { "-mxop", OPTION_MASK_ISA_XOP },
2687 { "-mlwp", OPTION_MASK_ISA_LWP },
2688 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2689 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2690 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2691 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2692 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
2693 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
2694 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
2695 { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA },
2696 { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI },
2697 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2698 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2699 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2700 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2701 { "-msse3", OPTION_MASK_ISA_SSE3 },
2702 { "-msse2", OPTION_MASK_ISA_SSE2 },
2703 { "-msse", OPTION_MASK_ISA_SSE },
2704 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2705 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2706 { "-mmmx", OPTION_MASK_ISA_MMX },
2707 { "-mabm", OPTION_MASK_ISA_ABM },
2708 { "-mbmi", OPTION_MASK_ISA_BMI },
2709 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2710 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2711 { "-mhle", OPTION_MASK_ISA_HLE },
2712 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2713 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2714 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2715 { "-madx", OPTION_MASK_ISA_ADX },
2716 { "-mtbm", OPTION_MASK_ISA_TBM },
2717 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2718 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2719 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2720 { "-maes", OPTION_MASK_ISA_AES },
2721 { "-msha", OPTION_MASK_ISA_SHA },
2722 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2723 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2724 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2725 { "-mf16c", OPTION_MASK_ISA_F16C },
2726 { "-mrtm", OPTION_MASK_ISA_RTM },
2727 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2728 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2729 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2730 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2731 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2732 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2733 { "-mmpx", OPTION_MASK_ISA_MPX },
2734 { "-mclwb", OPTION_MASK_ISA_CLWB },
2735 { "-mpcommit", OPTION_MASK_ISA_PCOMMIT },
2736 { "-mmwaitx", OPTION_MASK_ISA_MWAITX },
2739 /* Flag options. */
2740 static struct ix86_target_opts flag_opts[] =
2742 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2743 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2744 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2745 { "-m80387", MASK_80387 },
2746 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2747 { "-malign-double", MASK_ALIGN_DOUBLE },
2748 { "-mcld", MASK_CLD },
2749 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2750 { "-mieee-fp", MASK_IEEE_FP },
2751 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2752 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2753 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2754 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2755 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2756 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2757 { "-mno-red-zone", MASK_NO_RED_ZONE },
2758 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2759 { "-mrecip", MASK_RECIP },
2760 { "-mrtd", MASK_RTD },
2761 { "-msseregparm", MASK_SSEREGPARM },
2762 { "-mstack-arg-probe", MASK_STACK_PROBE },
2763 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2764 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2765 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2766 { "-mvzeroupper", MASK_VZEROUPPER },
2767 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2768 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2769 { "-mprefer-avx128", MASK_PREFER_AVX128},
2772 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2774 char isa_other[40];
2775 char target_other[40];
2776 unsigned num = 0;
2777 unsigned i, j;
2778 char *ret;
2779 char *ptr;
2780 size_t len;
2781 size_t line_len;
2782 size_t sep_len;
2783 const char *abi;
2785 memset (opts, '\0', sizeof (opts));
2787 /* Add -march= option. */
2788 if (arch)
2790 opts[num][0] = "-march=";
2791 opts[num++][1] = arch;
2794 /* Add -mtune= option. */
2795 if (tune)
2797 opts[num][0] = "-mtune=";
2798 opts[num++][1] = tune;
2801 /* Add -m32/-m64/-mx32. */
2802 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2804 if ((isa & OPTION_MASK_ABI_64) != 0)
2805 abi = "-m64";
2806 else
2807 abi = "-mx32";
2808 isa &= ~ (OPTION_MASK_ISA_64BIT
2809 | OPTION_MASK_ABI_64
2810 | OPTION_MASK_ABI_X32);
2812 else
2813 abi = "-m32";
2814 opts[num++][0] = abi;
2816 /* Pick out the options in isa options. */
2817 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2819 if ((isa & isa_opts[i].mask) != 0)
2821 opts[num++][0] = isa_opts[i].option;
2822 isa &= ~ isa_opts[i].mask;
2826 if (isa && add_nl_p)
2828 opts[num++][0] = isa_other;
2829 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2830 isa);
2833 /* Add flag options. */
2834 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2836 if ((flags & flag_opts[i].mask) != 0)
2838 opts[num++][0] = flag_opts[i].option;
2839 flags &= ~ flag_opts[i].mask;
2843 if (flags && add_nl_p)
2845 opts[num++][0] = target_other;
2846 sprintf (target_other, "(other flags: %#x)", flags);
2849 /* Add -fpmath= option. */
2850 if (fpmath)
2852 opts[num][0] = "-mfpmath=";
2853 switch ((int) fpmath)
2855 case FPMATH_387:
2856 opts[num++][1] = "387";
2857 break;
2859 case FPMATH_SSE:
2860 opts[num++][1] = "sse";
2861 break;
2863 case FPMATH_387 | FPMATH_SSE:
2864 opts[num++][1] = "sse+387";
2865 break;
2867 default:
2868 gcc_unreachable ();
2872 /* Any options? */
2873 if (num == 0)
2874 return NULL;
2876 gcc_assert (num < ARRAY_SIZE (opts));
2878 /* Size the string. */
2879 len = 0;
2880 sep_len = (add_nl_p) ? 3 : 1;
2881 for (i = 0; i < num; i++)
2883 len += sep_len;
2884 for (j = 0; j < 2; j++)
2885 if (opts[i][j])
2886 len += strlen (opts[i][j]);
2889 /* Build the string. */
2890 ret = ptr = (char *) xmalloc (len);
2891 line_len = 0;
2893 for (i = 0; i < num; i++)
2895 size_t len2[2];
2897 for (j = 0; j < 2; j++)
2898 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2900 if (i != 0)
2902 *ptr++ = ' ';
2903 line_len++;
2905 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2907 *ptr++ = '\\';
2908 *ptr++ = '\n';
2909 line_len = 0;
2913 for (j = 0; j < 2; j++)
2914 if (opts[i][j])
2916 memcpy (ptr, opts[i][j], len2[j]);
2917 ptr += len2[j];
2918 line_len += len2[j];
2922 *ptr = '\0';
2923 gcc_assert (ret + len >= ptr);
2925 return ret;
2928 /* Return true, if profiling code should be emitted before
2929 prologue. Otherwise it returns false.
2930 Note: For x86 with "hotfix" it is sorried. */
2931 static bool
2932 ix86_profile_before_prologue (void)
2934 return flag_fentry != 0;
2937 /* Function that is callable from the debugger to print the current
2938 options. */
2939 void ATTRIBUTE_UNUSED
2940 ix86_debug_options (void)
2942 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2943 ix86_arch_string, ix86_tune_string,
2944 ix86_fpmath, true);
2946 if (opts)
2948 fprintf (stderr, "%s\n\n", opts);
2949 free (opts);
2951 else
2952 fputs ("<no options>\n\n", stderr);
2954 return;
2957 static const char *stringop_alg_names[] = {
2958 #define DEF_ENUM
2959 #define DEF_ALG(alg, name) #name,
2960 #include "stringop.def"
2961 #undef DEF_ENUM
2962 #undef DEF_ALG
2965 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2966 The string is of the following form (or comma separated list of it):
2968 strategy_alg:max_size:[align|noalign]
2970 where the full size range for the strategy is either [0, max_size] or
2971 [min_size, max_size], in which min_size is the max_size + 1 of the
2972 preceding range. The last size range must have max_size == -1.
2974 Examples:
2977 -mmemcpy-strategy=libcall:-1:noalign
2979 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2983 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2985 This is to tell the compiler to use the following strategy for memset
2986 1) when the expected size is between [1, 16], use rep_8byte strategy;
2987 2) when the size is between [17, 2048], use vector_loop;
2988 3) when the size is > 2048, use libcall. */
2990 struct stringop_size_range
2992 int max;
2993 stringop_alg alg;
2994 bool noalign;
2997 static void
2998 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
3000 const struct stringop_algs *default_algs;
3001 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
3002 char *curr_range_str, *next_range_str;
3003 int i = 0, n = 0;
3005 if (is_memset)
3006 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
3007 else
3008 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
3010 curr_range_str = strategy_str;
3014 int maxs;
3015 char alg_name[128];
3016 char align[16];
3017 next_range_str = strchr (curr_range_str, ',');
3018 if (next_range_str)
3019 *next_range_str++ = '\0';
3021 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
3022 alg_name, &maxs, align))
3024 error ("wrong arg %s to option %s", curr_range_str,
3025 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3026 return;
3029 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
3031 error ("size ranges of option %s should be increasing",
3032 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3033 return;
3036 for (i = 0; i < last_alg; i++)
3037 if (!strcmp (alg_name, stringop_alg_names[i]))
3038 break;
3040 if (i == last_alg)
3042 error ("wrong stringop strategy name %s specified for option %s",
3043 alg_name,
3044 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3045 return;
3048 if ((stringop_alg) i == rep_prefix_8_byte
3049 && !TARGET_64BIT)
3051 /* rep; movq isn't available in 32-bit code. */
3052 error ("stringop strategy name %s specified for option %s "
3053 "not supported for 32-bit code",
3054 alg_name,
3055 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3056 return;
3059 input_ranges[n].max = maxs;
3060 input_ranges[n].alg = (stringop_alg) i;
3061 if (!strcmp (align, "align"))
3062 input_ranges[n].noalign = false;
3063 else if (!strcmp (align, "noalign"))
3064 input_ranges[n].noalign = true;
3065 else
3067 error ("unknown alignment %s specified for option %s",
3068 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3069 return;
3071 n++;
3072 curr_range_str = next_range_str;
3074 while (curr_range_str);
3076 if (input_ranges[n - 1].max != -1)
3078 error ("the max value for the last size range should be -1"
3079 " for option %s",
3080 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3081 return;
3084 if (n > MAX_STRINGOP_ALGS)
3086 error ("too many size ranges specified in option %s",
3087 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3088 return;
3091 /* Now override the default algs array. */
3092 for (i = 0; i < n; i++)
3094 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
3095 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
3096 = input_ranges[i].alg;
3097 *const_cast<int *>(&default_algs->size[i].noalign)
3098 = input_ranges[i].noalign;
3103 /* parse -mtune-ctrl= option. When DUMP is true,
3104 print the features that are explicitly set. */
3106 static void
3107 parse_mtune_ctrl_str (bool dump)
3109 if (!ix86_tune_ctrl_string)
3110 return;
3112 char *next_feature_string = NULL;
3113 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3114 char *orig = curr_feature_string;
3115 int i;
3118 bool clear = false;
3120 next_feature_string = strchr (curr_feature_string, ',');
3121 if (next_feature_string)
3122 *next_feature_string++ = '\0';
3123 if (*curr_feature_string == '^')
3125 curr_feature_string++;
3126 clear = true;
3128 for (i = 0; i < X86_TUNE_LAST; i++)
3130 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3132 ix86_tune_features[i] = !clear;
3133 if (dump)
3134 fprintf (stderr, "Explicitly %s feature %s\n",
3135 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3136 break;
3139 if (i == X86_TUNE_LAST)
3140 error ("Unknown parameter to option -mtune-ctrl: %s",
3141 clear ? curr_feature_string - 1 : curr_feature_string);
3142 curr_feature_string = next_feature_string;
3144 while (curr_feature_string);
3145 free (orig);
3148 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3149 processor type. */
3151 static void
3152 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3154 unsigned int ix86_tune_mask = 1u << ix86_tune;
3155 int i;
3157 for (i = 0; i < X86_TUNE_LAST; ++i)
3159 if (ix86_tune_no_default)
3160 ix86_tune_features[i] = 0;
3161 else
3162 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3165 if (dump)
3167 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3168 for (i = 0; i < X86_TUNE_LAST; i++)
3169 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3170 ix86_tune_features[i] ? "on" : "off");
3173 parse_mtune_ctrl_str (dump);
3177 /* Default align_* from the processor table. */
3179 static void
3180 ix86_default_align (struct gcc_options *opts)
3182 if (opts->x_align_loops == 0)
3184 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3185 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3187 if (opts->x_align_jumps == 0)
3189 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3190 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3192 if (opts->x_align_functions == 0)
3194 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3198 /* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook. */
3200 static void
3201 ix86_override_options_after_change (void)
3203 ix86_default_align (&global_options);
3206 /* Override various settings based on options. If MAIN_ARGS_P, the
3207 options are from the command line, otherwise they are from
3208 attributes. */
3210 static void
3211 ix86_option_override_internal (bool main_args_p,
3212 struct gcc_options *opts,
3213 struct gcc_options *opts_set)
3215 int i;
3216 unsigned int ix86_arch_mask;
3217 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3218 const char *prefix;
3219 const char *suffix;
3220 const char *sw;
3222 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3223 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3224 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3225 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3226 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3227 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3228 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3229 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3230 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3231 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3232 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3233 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3234 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3235 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3236 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3237 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3238 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3239 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3240 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3241 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3242 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3243 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3244 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3245 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3246 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3247 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3248 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3249 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3250 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3251 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3252 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3253 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3254 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3255 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3256 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3257 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3258 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3259 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3260 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3261 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3262 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3263 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3264 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3265 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3266 #define PTA_MPX (HOST_WIDE_INT_1 << 44)
3267 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3268 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3269 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3270 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3271 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3272 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
3273 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
3274 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
3275 #define PTA_AVX512IFMA (HOST_WIDE_INT_1 << 53)
3276 #define PTA_AVX512VBMI (HOST_WIDE_INT_1 << 54)
3277 #define PTA_CLWB (HOST_WIDE_INT_1 << 55)
3278 #define PTA_PCOMMIT (HOST_WIDE_INT_1 << 56)
3279 #define PTA_MWAITX (HOST_WIDE_INT_1 << 57)
3281 #define PTA_CORE2 \
3282 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3283 | PTA_CX16 | PTA_FXSR)
3284 #define PTA_NEHALEM \
3285 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3286 #define PTA_WESTMERE \
3287 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3288 #define PTA_SANDYBRIDGE \
3289 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3290 #define PTA_IVYBRIDGE \
3291 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3292 #define PTA_HASWELL \
3293 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3294 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3295 #define PTA_BROADWELL \
3296 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3297 #define PTA_KNL \
3298 (PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD)
3299 #define PTA_BONNELL \
3300 (PTA_CORE2 | PTA_MOVBE)
3301 #define PTA_SILVERMONT \
3302 (PTA_WESTMERE | PTA_MOVBE)
3304 /* if this reaches 64, need to widen struct pta flags below */
3306 static struct pta
3308 const char *const name; /* processor name or nickname. */
3309 const enum processor_type processor;
3310 const enum attr_cpu schedule;
3311 const unsigned HOST_WIDE_INT flags;
3313 const processor_alias_table[] =
3315 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3316 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3317 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3318 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3319 {"iamcu", PROCESSOR_IAMCU, CPU_PENTIUM, 0},
3320 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3321 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3322 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3323 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3324 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3325 PTA_MMX | PTA_SSE | PTA_FXSR},
3326 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3327 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3328 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3329 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3330 PTA_MMX | PTA_SSE | PTA_FXSR},
3331 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3332 PTA_MMX | PTA_SSE | PTA_FXSR},
3333 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3334 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3335 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3336 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3337 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3338 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3339 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3340 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3341 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3342 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3343 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3344 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3345 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3346 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3347 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3348 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3349 PTA_SANDYBRIDGE},
3350 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3351 PTA_SANDYBRIDGE},
3352 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3353 PTA_IVYBRIDGE},
3354 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3355 PTA_IVYBRIDGE},
3356 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3357 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3358 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3359 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3360 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3361 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3362 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3363 {"knl", PROCESSOR_KNL, CPU_KNL, PTA_KNL},
3364 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3365 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3366 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3367 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3368 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3369 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3370 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3371 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3372 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3373 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3374 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3375 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3376 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3377 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3378 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3379 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3380 {"x86-64", PROCESSOR_K8, CPU_K8,
3381 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3382 {"k8", PROCESSOR_K8, CPU_K8,
3383 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3384 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3385 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3386 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3387 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3388 {"opteron", PROCESSOR_K8, CPU_K8,
3389 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3390 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3391 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3392 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3393 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3394 {"athlon64", PROCESSOR_K8, CPU_K8,
3395 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3396 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3397 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3398 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3399 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3400 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3401 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3402 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3403 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3404 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3405 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3406 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3407 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3408 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3409 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3410 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3411 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3412 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3413 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3414 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3415 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3416 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3417 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3418 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3419 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3420 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3421 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3422 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3423 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3424 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3425 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3426 | PTA_XSAVEOPT | PTA_FSGSBASE},
3427 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3428 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3429 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3430 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3431 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3432 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3433 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3434 | PTA_MOVBE | PTA_MWAITX},
3435 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3436 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3437 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3438 | PTA_FXSR | PTA_XSAVE},
3439 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3440 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3441 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3442 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3443 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3444 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3446 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3447 PTA_64BIT
3448 | PTA_HLE /* flags are only used for -march switch. */ },
3451 /* -mrecip options. */
3452 static struct
3454 const char *string; /* option name */
3455 unsigned int mask; /* mask bits to set */
3457 const recip_options[] =
3459 { "all", RECIP_MASK_ALL },
3460 { "none", RECIP_MASK_NONE },
3461 { "div", RECIP_MASK_DIV },
3462 { "sqrt", RECIP_MASK_SQRT },
3463 { "vec-div", RECIP_MASK_VEC_DIV },
3464 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3467 int const pta_size = ARRAY_SIZE (processor_alias_table);
3469 /* Set up prefix/suffix so the error messages refer to either the command
3470 line argument, or the attribute(target). */
3471 if (main_args_p)
3473 prefix = "-m";
3474 suffix = "";
3475 sw = "switch";
3477 else
3479 prefix = "option(\"";
3480 suffix = "\")";
3481 sw = "attribute";
3484 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3485 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3486 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3487 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3488 #ifdef TARGET_BI_ARCH
3489 else
3491 #if TARGET_BI_ARCH == 1
3492 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3493 is on and OPTION_MASK_ABI_X32 is off. We turn off
3494 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3495 -mx32. */
3496 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3497 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3498 #else
3499 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3500 on and OPTION_MASK_ABI_64 is off. We turn off
3501 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3502 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
3503 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
3504 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
3505 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3506 #endif
3507 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3508 && TARGET_IAMCU_P (opts->x_target_flags))
3509 sorry ("Intel MCU psABI isn%'t supported in %s mode",
3510 TARGET_X32_P (opts->x_ix86_isa_flags) ? "x32" : "64-bit");
3512 #endif
3514 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3516 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3517 OPTION_MASK_ABI_64 for TARGET_X32. */
3518 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3519 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3521 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3522 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3523 | OPTION_MASK_ABI_X32
3524 | OPTION_MASK_ABI_64);
3525 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3527 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3528 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3529 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3530 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3533 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3534 SUBTARGET_OVERRIDE_OPTIONS;
3535 #endif
3537 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3538 SUBSUBTARGET_OVERRIDE_OPTIONS;
3539 #endif
3541 /* -fPIC is the default for x86_64. */
3542 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3543 opts->x_flag_pic = 2;
3545 /* Need to check -mtune=generic first. */
3546 if (opts->x_ix86_tune_string)
3548 /* As special support for cross compilers we read -mtune=native
3549 as -mtune=generic. With native compilers we won't see the
3550 -mtune=native, as it was changed by the driver. */
3551 if (!strcmp (opts->x_ix86_tune_string, "native"))
3553 opts->x_ix86_tune_string = "generic";
3555 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3556 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3557 "%stune=k8%s or %stune=generic%s instead as appropriate",
3558 prefix, suffix, prefix, suffix, prefix, suffix);
3560 else
3562 if (opts->x_ix86_arch_string)
3563 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3564 if (!opts->x_ix86_tune_string)
3566 opts->x_ix86_tune_string
3567 = processor_target_table[TARGET_CPU_DEFAULT].name;
3568 ix86_tune_defaulted = 1;
3571 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3572 or defaulted. We need to use a sensible tune option. */
3573 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3575 opts->x_ix86_tune_string = "generic";
3579 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3580 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3582 /* rep; movq isn't available in 32-bit code. */
3583 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3584 opts->x_ix86_stringop_alg = no_stringop;
3587 if (!opts->x_ix86_arch_string)
3588 opts->x_ix86_arch_string
3589 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3590 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3591 else
3592 ix86_arch_specified = 1;
3594 if (opts_set->x_ix86_pmode)
3596 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3597 && opts->x_ix86_pmode == PMODE_SI)
3598 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3599 && opts->x_ix86_pmode == PMODE_DI))
3600 error ("address mode %qs not supported in the %s bit mode",
3601 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3602 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3604 else
3605 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3606 ? PMODE_DI : PMODE_SI;
3608 if (!opts_set->x_ix86_abi)
3609 opts->x_ix86_abi = DEFAULT_ABI;
3611 /* For targets using ms ABI enable ms-extensions, if not
3612 explicit turned off. For non-ms ABI we turn off this
3613 option. */
3614 if (!opts_set->x_flag_ms_extensions)
3615 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3617 if (opts_set->x_ix86_cmodel)
3619 switch (opts->x_ix86_cmodel)
3621 case CM_SMALL:
3622 case CM_SMALL_PIC:
3623 if (opts->x_flag_pic)
3624 opts->x_ix86_cmodel = CM_SMALL_PIC;
3625 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3626 error ("code model %qs not supported in the %s bit mode",
3627 "small", "32");
3628 break;
3630 case CM_MEDIUM:
3631 case CM_MEDIUM_PIC:
3632 if (opts->x_flag_pic)
3633 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3634 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3635 error ("code model %qs not supported in the %s bit mode",
3636 "medium", "32");
3637 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3638 error ("code model %qs not supported in x32 mode",
3639 "medium");
3640 break;
3642 case CM_LARGE:
3643 case CM_LARGE_PIC:
3644 if (opts->x_flag_pic)
3645 opts->x_ix86_cmodel = CM_LARGE_PIC;
3646 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3647 error ("code model %qs not supported in the %s bit mode",
3648 "large", "32");
3649 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3650 error ("code model %qs not supported in x32 mode",
3651 "large");
3652 break;
3654 case CM_32:
3655 if (opts->x_flag_pic)
3656 error ("code model %s does not support PIC mode", "32");
3657 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3658 error ("code model %qs not supported in the %s bit mode",
3659 "32", "64");
3660 break;
3662 case CM_KERNEL:
3663 if (opts->x_flag_pic)
3665 error ("code model %s does not support PIC mode", "kernel");
3666 opts->x_ix86_cmodel = CM_32;
3668 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3669 error ("code model %qs not supported in the %s bit mode",
3670 "kernel", "32");
3671 break;
3673 default:
3674 gcc_unreachable ();
3677 else
3679 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3680 use of rip-relative addressing. This eliminates fixups that
3681 would otherwise be needed if this object is to be placed in a
3682 DLL, and is essentially just as efficient as direct addressing. */
3683 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3684 && (TARGET_RDOS || TARGET_PECOFF))
3685 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3686 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3687 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3688 else
3689 opts->x_ix86_cmodel = CM_32;
3691 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3693 error ("-masm=intel not supported in this configuration");
3694 opts->x_ix86_asm_dialect = ASM_ATT;
3696 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3697 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3698 sorry ("%i-bit mode not compiled in",
3699 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3701 for (i = 0; i < pta_size; i++)
3702 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3704 ix86_schedule = processor_alias_table[i].schedule;
3705 ix86_arch = processor_alias_table[i].processor;
3706 /* Default cpu tuning to the architecture. */
3707 ix86_tune = ix86_arch;
3709 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3710 && !(processor_alias_table[i].flags & PTA_64BIT))
3711 error ("CPU you selected does not support x86-64 "
3712 "instruction set");
3714 if (processor_alias_table[i].flags & PTA_MMX
3715 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3716 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3717 if (processor_alias_table[i].flags & PTA_3DNOW
3718 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3719 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3720 if (processor_alias_table[i].flags & PTA_3DNOW_A
3721 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3722 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3723 if (processor_alias_table[i].flags & PTA_SSE
3724 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3725 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3726 if (processor_alias_table[i].flags & PTA_SSE2
3727 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3728 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3729 if (processor_alias_table[i].flags & PTA_SSE3
3730 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3731 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3732 if (processor_alias_table[i].flags & PTA_SSSE3
3733 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3734 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3735 if (processor_alias_table[i].flags & PTA_SSE4_1
3736 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3737 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3738 if (processor_alias_table[i].flags & PTA_SSE4_2
3739 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3740 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3741 if (processor_alias_table[i].flags & PTA_AVX
3742 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3743 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3744 if (processor_alias_table[i].flags & PTA_AVX2
3745 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3746 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3747 if (processor_alias_table[i].flags & PTA_FMA
3748 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3749 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3750 if (processor_alias_table[i].flags & PTA_SSE4A
3751 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3752 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3753 if (processor_alias_table[i].flags & PTA_FMA4
3754 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3755 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3756 if (processor_alias_table[i].flags & PTA_XOP
3757 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3758 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3759 if (processor_alias_table[i].flags & PTA_LWP
3760 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3761 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3762 if (processor_alias_table[i].flags & PTA_ABM
3763 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3764 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3765 if (processor_alias_table[i].flags & PTA_BMI
3766 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3767 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3768 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3769 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3770 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3771 if (processor_alias_table[i].flags & PTA_TBM
3772 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3773 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3774 if (processor_alias_table[i].flags & PTA_BMI2
3775 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3776 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3777 if (processor_alias_table[i].flags & PTA_CX16
3778 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3779 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3780 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3781 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3782 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3783 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3784 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3785 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3786 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3787 if (processor_alias_table[i].flags & PTA_MOVBE
3788 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3789 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3790 if (processor_alias_table[i].flags & PTA_AES
3791 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3792 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3793 if (processor_alias_table[i].flags & PTA_SHA
3794 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3795 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3796 if (processor_alias_table[i].flags & PTA_PCLMUL
3797 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3798 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3799 if (processor_alias_table[i].flags & PTA_FSGSBASE
3800 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3801 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3802 if (processor_alias_table[i].flags & PTA_RDRND
3803 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3804 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3805 if (processor_alias_table[i].flags & PTA_F16C
3806 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3807 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3808 if (processor_alias_table[i].flags & PTA_RTM
3809 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3810 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3811 if (processor_alias_table[i].flags & PTA_HLE
3812 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3813 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3814 if (processor_alias_table[i].flags & PTA_PRFCHW
3815 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3816 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3817 if (processor_alias_table[i].flags & PTA_RDSEED
3818 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3819 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3820 if (processor_alias_table[i].flags & PTA_ADX
3821 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3822 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3823 if (processor_alias_table[i].flags & PTA_FXSR
3824 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3825 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3826 if (processor_alias_table[i].flags & PTA_XSAVE
3827 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3828 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3829 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3830 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3831 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3832 if (processor_alias_table[i].flags & PTA_AVX512F
3833 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3834 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3835 if (processor_alias_table[i].flags & PTA_AVX512ER
3836 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3837 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3838 if (processor_alias_table[i].flags & PTA_AVX512PF
3839 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3840 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3841 if (processor_alias_table[i].flags & PTA_AVX512CD
3842 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3843 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3844 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3845 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3846 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3847 if (processor_alias_table[i].flags & PTA_PCOMMIT
3848 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCOMMIT))
3849 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCOMMIT;
3850 if (processor_alias_table[i].flags & PTA_CLWB
3851 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB))
3852 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB;
3853 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3854 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3855 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3856 if (processor_alias_table[i].flags & PTA_XSAVEC
3857 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3858 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3859 if (processor_alias_table[i].flags & PTA_XSAVES
3860 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3861 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3862 if (processor_alias_table[i].flags & PTA_AVX512DQ
3863 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
3864 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
3865 if (processor_alias_table[i].flags & PTA_AVX512BW
3866 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
3867 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
3868 if (processor_alias_table[i].flags & PTA_AVX512VL
3869 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
3870 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
3871 if (processor_alias_table[i].flags & PTA_MPX
3872 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MPX))
3873 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MPX;
3874 if (processor_alias_table[i].flags & PTA_AVX512VBMI
3875 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI))
3876 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI;
3877 if (processor_alias_table[i].flags & PTA_AVX512IFMA
3878 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA))
3879 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA;
3880 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3881 x86_prefetch_sse = true;
3882 if (processor_alias_table[i].flags & PTA_MWAITX
3883 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MWAITX))
3884 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MWAITX;
3886 break;
3889 if (TARGET_X32 && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_MPX))
3890 error ("Intel MPX does not support x32");
3892 if (TARGET_X32 && (ix86_isa_flags & OPTION_MASK_ISA_MPX))
3893 error ("Intel MPX does not support x32");
3895 if (TARGET_IAMCU_P (opts->x_target_flags))
3897 /* Verify that x87/MMX/SSE/AVX is off for -miamcu. */
3898 if (TARGET_80387_P (opts->x_target_flags))
3899 sorry ("X87 FPU isn%'t supported in Intel MCU psABI");
3900 else if ((opts->x_ix86_isa_flags & (OPTION_MASK_ISA_MMX
3901 | OPTION_MASK_ISA_SSE
3902 | OPTION_MASK_ISA_AVX)))
3903 sorry ("%s isn%'t supported in Intel MCU psABI",
3904 TARGET_MMX_P (opts->x_ix86_isa_flags)
3905 ? "MMX"
3906 : TARGET_SSE_P (opts->x_ix86_isa_flags) ? "SSE" : "AVX");
3909 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3910 error ("generic CPU can be used only for %stune=%s %s",
3911 prefix, suffix, sw);
3912 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3913 error ("intel CPU can be used only for %stune=%s %s",
3914 prefix, suffix, sw);
3915 else if (i == pta_size)
3916 error ("bad value (%s) for %sarch=%s %s",
3917 opts->x_ix86_arch_string, prefix, suffix, sw);
3919 ix86_arch_mask = 1u << ix86_arch;
3920 for (i = 0; i < X86_ARCH_LAST; ++i)
3921 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3923 for (i = 0; i < pta_size; i++)
3924 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3926 ix86_schedule = processor_alias_table[i].schedule;
3927 ix86_tune = processor_alias_table[i].processor;
3928 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3930 if (!(processor_alias_table[i].flags & PTA_64BIT))
3932 if (ix86_tune_defaulted)
3934 opts->x_ix86_tune_string = "x86-64";
3935 for (i = 0; i < pta_size; i++)
3936 if (! strcmp (opts->x_ix86_tune_string,
3937 processor_alias_table[i].name))
3938 break;
3939 ix86_schedule = processor_alias_table[i].schedule;
3940 ix86_tune = processor_alias_table[i].processor;
3942 else
3943 error ("CPU you selected does not support x86-64 "
3944 "instruction set");
3947 /* Intel CPUs have always interpreted SSE prefetch instructions as
3948 NOPs; so, we can enable SSE prefetch instructions even when
3949 -mtune (rather than -march) points us to a processor that has them.
3950 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3951 higher processors. */
3952 if (TARGET_CMOV
3953 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3954 x86_prefetch_sse = true;
3955 break;
3958 if (ix86_tune_specified && i == pta_size)
3959 error ("bad value (%s) for %stune=%s %s",
3960 opts->x_ix86_tune_string, prefix, suffix, sw);
3962 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3964 #ifndef USE_IX86_FRAME_POINTER
3965 #define USE_IX86_FRAME_POINTER 0
3966 #endif
3968 #ifndef USE_X86_64_FRAME_POINTER
3969 #define USE_X86_64_FRAME_POINTER 0
3970 #endif
3972 /* Set the default values for switches whose default depends on TARGET_64BIT
3973 in case they weren't overwritten by command line options. */
3974 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3976 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3977 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3978 if (opts->x_flag_asynchronous_unwind_tables
3979 && !opts_set->x_flag_unwind_tables
3980 && TARGET_64BIT_MS_ABI)
3981 opts->x_flag_unwind_tables = 1;
3982 if (opts->x_flag_asynchronous_unwind_tables == 2)
3983 opts->x_flag_unwind_tables
3984 = opts->x_flag_asynchronous_unwind_tables = 1;
3985 if (opts->x_flag_pcc_struct_return == 2)
3986 opts->x_flag_pcc_struct_return = 0;
3988 else
3990 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3991 opts->x_flag_omit_frame_pointer
3992 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3993 if (opts->x_flag_asynchronous_unwind_tables == 2)
3994 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3995 if (opts->x_flag_pcc_struct_return == 2)
3997 /* Intel MCU psABI specifies that -freg-struct-return should
3998 be on. Instead of setting DEFAULT_PCC_STRUCT_RETURN to 1,
3999 we check -miamcu so that -freg-struct-return is always
4000 turned on if -miamcu is used. */
4001 if (TARGET_IAMCU_P (opts->x_target_flags))
4002 opts->x_flag_pcc_struct_return = 0;
4003 else
4004 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
4008 ix86_tune_cost = processor_target_table[ix86_tune].cost;
4009 /* TODO: ix86_cost should be chosen at instruction or function granuality
4010 so for cold code we use size_cost even in !optimize_size compilation. */
4011 if (opts->x_optimize_size)
4012 ix86_cost = &ix86_size_cost;
4013 else
4014 ix86_cost = ix86_tune_cost;
4016 /* Arrange to set up i386_stack_locals for all functions. */
4017 init_machine_status = ix86_init_machine_status;
4019 /* Validate -mregparm= value. */
4020 if (opts_set->x_ix86_regparm)
4022 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4023 warning (0, "-mregparm is ignored in 64-bit mode");
4024 else if (TARGET_IAMCU_P (opts->x_target_flags))
4025 warning (0, "-mregparm is ignored for Intel MCU psABI");
4026 if (opts->x_ix86_regparm > REGPARM_MAX)
4028 error ("-mregparm=%d is not between 0 and %d",
4029 opts->x_ix86_regparm, REGPARM_MAX);
4030 opts->x_ix86_regparm = 0;
4033 if (TARGET_IAMCU_P (opts->x_target_flags)
4034 || TARGET_64BIT_P (opts->x_ix86_isa_flags))
4035 opts->x_ix86_regparm = REGPARM_MAX;
4037 /* Default align_* from the processor table. */
4038 ix86_default_align (opts);
4040 /* Provide default for -mbranch-cost= value. */
4041 if (!opts_set->x_ix86_branch_cost)
4042 opts->x_ix86_branch_cost = ix86_tune_cost->branch_cost;
4044 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4046 opts->x_target_flags
4047 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
4049 /* Enable by default the SSE and MMX builtins. Do allow the user to
4050 explicitly disable any of these. In particular, disabling SSE and
4051 MMX for kernel code is extremely useful. */
4052 if (!ix86_arch_specified)
4053 opts->x_ix86_isa_flags
4054 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
4055 | TARGET_SUBTARGET64_ISA_DEFAULT)
4056 & ~opts->x_ix86_isa_flags_explicit);
4058 if (TARGET_RTD_P (opts->x_target_flags))
4059 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
4061 else
4063 opts->x_target_flags
4064 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
4066 if (!ix86_arch_specified)
4067 opts->x_ix86_isa_flags
4068 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
4070 /* i386 ABI does not specify red zone. It still makes sense to use it
4071 when programmer takes care to stack from being destroyed. */
4072 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
4073 opts->x_target_flags |= MASK_NO_RED_ZONE;
4076 /* Keep nonleaf frame pointers. */
4077 if (opts->x_flag_omit_frame_pointer)
4078 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
4079 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
4080 opts->x_flag_omit_frame_pointer = 1;
4082 /* If we're doing fast math, we don't care about comparison order
4083 wrt NaNs. This lets us use a shorter comparison sequence. */
4084 if (opts->x_flag_finite_math_only)
4085 opts->x_target_flags &= ~MASK_IEEE_FP;
4087 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
4088 since the insns won't need emulation. */
4089 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
4090 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
4092 /* Likewise, if the target doesn't have a 387, or we've specified
4093 software floating point, don't use 387 inline intrinsics. */
4094 if (!TARGET_80387_P (opts->x_target_flags))
4095 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
4097 /* Turn on MMX builtins for -msse. */
4098 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
4099 opts->x_ix86_isa_flags
4100 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
4102 /* Enable SSE prefetch. */
4103 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
4104 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
4105 x86_prefetch_sse = true;
4107 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
4108 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
4109 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
4110 opts->x_ix86_isa_flags
4111 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
4113 /* Enable popcnt instruction for -msse4.2 or -mabm. */
4114 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
4115 || TARGET_ABM_P (opts->x_ix86_isa_flags))
4116 opts->x_ix86_isa_flags
4117 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
4119 /* Enable lzcnt instruction for -mabm. */
4120 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
4121 opts->x_ix86_isa_flags
4122 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
4124 /* Validate -mpreferred-stack-boundary= value or default it to
4125 PREFERRED_STACK_BOUNDARY_DEFAULT. */
4126 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
4127 if (opts_set->x_ix86_preferred_stack_boundary_arg)
4129 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
4130 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
4131 int max = (TARGET_SEH ? 4 : 12);
4133 if (opts->x_ix86_preferred_stack_boundary_arg < min
4134 || opts->x_ix86_preferred_stack_boundary_arg > max)
4136 if (min == max)
4137 error ("-mpreferred-stack-boundary is not supported "
4138 "for this target");
4139 else
4140 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
4141 opts->x_ix86_preferred_stack_boundary_arg, min, max);
4143 else
4144 ix86_preferred_stack_boundary
4145 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
4148 /* Set the default value for -mstackrealign. */
4149 if (opts->x_ix86_force_align_arg_pointer == -1)
4150 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
4152 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
4154 /* Validate -mincoming-stack-boundary= value or default it to
4155 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
4156 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
4157 if (opts_set->x_ix86_incoming_stack_boundary_arg)
4159 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
4160 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
4162 if (opts->x_ix86_incoming_stack_boundary_arg < min
4163 || opts->x_ix86_incoming_stack_boundary_arg > 12)
4164 error ("-mincoming-stack-boundary=%d is not between %d and 12",
4165 opts->x_ix86_incoming_stack_boundary_arg, min);
4166 else
4168 ix86_user_incoming_stack_boundary
4169 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
4170 ix86_incoming_stack_boundary
4171 = ix86_user_incoming_stack_boundary;
4175 #ifndef NO_PROFILE_COUNTERS
4176 if (flag_nop_mcount)
4177 error ("-mnop-mcount is not compatible with this target");
4178 #endif
4179 if (flag_nop_mcount && flag_pic)
4180 error ("-mnop-mcount is not implemented for -fPIC");
4182 /* Accept -msseregparm only if at least SSE support is enabled. */
4183 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
4184 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
4185 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
4187 if (opts_set->x_ix86_fpmath)
4189 if (opts->x_ix86_fpmath & FPMATH_SSE)
4191 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
4193 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4194 opts->x_ix86_fpmath = FPMATH_387;
4196 else if ((opts->x_ix86_fpmath & FPMATH_387)
4197 && !TARGET_80387_P (opts->x_target_flags))
4199 warning (0, "387 instruction set disabled, using SSE arithmetics");
4200 opts->x_ix86_fpmath = FPMATH_SSE;
4204 /* For all chips supporting SSE2, -mfpmath=sse performs better than
4205 fpmath=387. The second is however default at many targets since the
4206 extra 80bit precision of temporaries is considered to be part of ABI.
4207 Overwrite the default at least for -ffast-math.
4208 TODO: -mfpmath=both seems to produce same performing code with bit
4209 smaller binaries. It is however not clear if register allocation is
4210 ready for this setting.
4211 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4212 codegen. We may switch to 387 with -ffast-math for size optimized
4213 functions. */
4214 else if (fast_math_flags_set_p (&global_options)
4215 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4216 opts->x_ix86_fpmath = FPMATH_SSE;
4217 else
4218 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4220 /* If the i387 is disabled, then do not return values in it. */
4221 if (!TARGET_80387_P (opts->x_target_flags))
4222 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4224 /* Use external vectorized library in vectorizing intrinsics. */
4225 if (opts_set->x_ix86_veclibabi_type)
4226 switch (opts->x_ix86_veclibabi_type)
4228 case ix86_veclibabi_type_svml:
4229 ix86_veclib_handler = ix86_veclibabi_svml;
4230 break;
4232 case ix86_veclibabi_type_acml:
4233 ix86_veclib_handler = ix86_veclibabi_acml;
4234 break;
4236 default:
4237 gcc_unreachable ();
4240 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4241 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4242 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4244 /* If stack probes are required, the space used for large function
4245 arguments on the stack must also be probed, so enable
4246 -maccumulate-outgoing-args so this happens in the prologue. */
4247 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4248 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4250 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4251 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4252 "for correctness", prefix, suffix);
4253 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4256 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4258 char *p;
4259 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4260 p = strchr (internal_label_prefix, 'X');
4261 internal_label_prefix_len = p - internal_label_prefix;
4262 *p = '\0';
4265 /* When scheduling description is not available, disable scheduler pass
4266 so it won't slow down the compilation and make x87 code slower. */
4267 if (!TARGET_SCHEDULE)
4268 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4270 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4271 ix86_tune_cost->simultaneous_prefetches,
4272 opts->x_param_values,
4273 opts_set->x_param_values);
4274 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4275 ix86_tune_cost->prefetch_block,
4276 opts->x_param_values,
4277 opts_set->x_param_values);
4278 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4279 ix86_tune_cost->l1_cache_size,
4280 opts->x_param_values,
4281 opts_set->x_param_values);
4282 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4283 ix86_tune_cost->l2_cache_size,
4284 opts->x_param_values,
4285 opts_set->x_param_values);
4287 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4288 if (opts->x_flag_prefetch_loop_arrays < 0
4289 && HAVE_prefetch
4290 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4291 && !opts->x_optimize_size
4292 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4293 opts->x_flag_prefetch_loop_arrays = 1;
4295 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4296 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4297 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4298 targetm.expand_builtin_va_start = NULL;
4300 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4302 ix86_gen_leave = gen_leave_rex64;
4303 if (Pmode == DImode)
4305 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4306 ix86_gen_tls_local_dynamic_base_64
4307 = gen_tls_local_dynamic_base_64_di;
4309 else
4311 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4312 ix86_gen_tls_local_dynamic_base_64
4313 = gen_tls_local_dynamic_base_64_si;
4316 else
4317 ix86_gen_leave = gen_leave;
4319 if (Pmode == DImode)
4321 ix86_gen_add3 = gen_adddi3;
4322 ix86_gen_sub3 = gen_subdi3;
4323 ix86_gen_sub3_carry = gen_subdi3_carry;
4324 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4325 ix86_gen_andsp = gen_anddi3;
4326 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4327 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4328 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4329 ix86_gen_monitor = gen_sse3_monitor_di;
4330 ix86_gen_monitorx = gen_monitorx_di;
4332 else
4334 ix86_gen_add3 = gen_addsi3;
4335 ix86_gen_sub3 = gen_subsi3;
4336 ix86_gen_sub3_carry = gen_subsi3_carry;
4337 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4338 ix86_gen_andsp = gen_andsi3;
4339 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4340 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4341 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4342 ix86_gen_monitor = gen_sse3_monitor_si;
4343 ix86_gen_monitorx = gen_monitorx_si;
4346 #ifdef USE_IX86_CLD
4347 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4348 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4349 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4350 #endif
4352 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4354 if (opts->x_flag_fentry > 0)
4355 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4356 "with -fpic");
4357 opts->x_flag_fentry = 0;
4359 else if (TARGET_SEH)
4361 if (opts->x_flag_fentry == 0)
4362 sorry ("-mno-fentry isn%'t compatible with SEH");
4363 opts->x_flag_fentry = 1;
4365 else if (opts->x_flag_fentry < 0)
4367 #if defined(PROFILE_BEFORE_PROLOGUE)
4368 opts->x_flag_fentry = 1;
4369 #else
4370 opts->x_flag_fentry = 0;
4371 #endif
4374 if (!(opts_set->x_target_flags & MASK_VZEROUPPER))
4375 opts->x_target_flags |= MASK_VZEROUPPER;
4376 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4377 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4378 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4379 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4380 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4381 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4382 /* Enable 128-bit AVX instruction generation
4383 for the auto-vectorizer. */
4384 if (TARGET_AVX128_OPTIMAL
4385 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4386 opts->x_target_flags |= MASK_PREFER_AVX128;
4388 if (opts->x_ix86_recip_name)
4390 char *p = ASTRDUP (opts->x_ix86_recip_name);
4391 char *q;
4392 unsigned int mask, i;
4393 bool invert;
4395 while ((q = strtok (p, ",")) != NULL)
4397 p = NULL;
4398 if (*q == '!')
4400 invert = true;
4401 q++;
4403 else
4404 invert = false;
4406 if (!strcmp (q, "default"))
4407 mask = RECIP_MASK_ALL;
4408 else
4410 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4411 if (!strcmp (q, recip_options[i].string))
4413 mask = recip_options[i].mask;
4414 break;
4417 if (i == ARRAY_SIZE (recip_options))
4419 error ("unknown option for -mrecip=%s", q);
4420 invert = false;
4421 mask = RECIP_MASK_NONE;
4425 opts->x_recip_mask_explicit |= mask;
4426 if (invert)
4427 opts->x_recip_mask &= ~mask;
4428 else
4429 opts->x_recip_mask |= mask;
4433 if (TARGET_RECIP_P (opts->x_target_flags))
4434 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4435 else if (opts_set->x_target_flags & MASK_RECIP)
4436 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4438 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4439 for 64-bit Bionic. Also default long double to 64-bit for Intel
4440 MCU psABI. */
4441 if ((TARGET_HAS_BIONIC || TARGET_IAMCU)
4442 && !(opts_set->x_target_flags
4443 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4444 opts->x_target_flags |= (TARGET_64BIT
4445 ? MASK_LONG_DOUBLE_128
4446 : MASK_LONG_DOUBLE_64);
4448 /* Only one of them can be active. */
4449 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4450 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4452 /* Save the initial options in case the user does function specific
4453 options. */
4454 if (main_args_p)
4455 target_option_default_node = target_option_current_node
4456 = build_target_option_node (opts);
4458 /* Handle stack protector */
4459 if (!opts_set->x_ix86_stack_protector_guard)
4460 opts->x_ix86_stack_protector_guard
4461 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4463 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4464 if (opts->x_ix86_tune_memcpy_strategy)
4466 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4467 ix86_parse_stringop_strategy_string (str, false);
4468 free (str);
4471 if (opts->x_ix86_tune_memset_strategy)
4473 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4474 ix86_parse_stringop_strategy_string (str, true);
4475 free (str);
4479 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4481 static void
4482 ix86_option_override (void)
4484 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4485 struct register_pass_info insert_vzeroupper_info
4486 = { pass_insert_vzeroupper, "reload",
4487 1, PASS_POS_INSERT_AFTER
4490 ix86_option_override_internal (true, &global_options, &global_options_set);
4493 /* This needs to be done at start up. It's convenient to do it here. */
4494 register_pass (&insert_vzeroupper_info);
4497 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
4498 static char *
4499 ix86_offload_options (void)
4501 if (TARGET_LP64)
4502 return xstrdup ("-foffload-abi=lp64");
4503 return xstrdup ("-foffload-abi=ilp32");
4506 /* Update register usage after having seen the compiler flags. */
4508 static void
4509 ix86_conditional_register_usage (void)
4511 int i, c_mask;
4513 /* For 32-bit targets, squash the REX registers. */
4514 if (! TARGET_64BIT)
4516 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4517 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4518 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4519 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4520 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4521 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4524 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4525 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4526 : TARGET_64BIT ? (1 << 2)
4527 : (1 << 1));
4529 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4531 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4533 /* Set/reset conditionally defined registers from
4534 CALL_USED_REGISTERS initializer. */
4535 if (call_used_regs[i] > 1)
4536 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4538 /* Calculate registers of CLOBBERED_REGS register set
4539 as call used registers from GENERAL_REGS register set. */
4540 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4541 && call_used_regs[i])
4542 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4545 /* If MMX is disabled, squash the registers. */
4546 if (! TARGET_MMX)
4547 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4548 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4549 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4551 /* If SSE is disabled, squash the registers. */
4552 if (! TARGET_SSE)
4553 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4554 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4555 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4557 /* If the FPU is disabled, squash the registers. */
4558 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4559 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4560 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4561 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4563 /* If AVX512F is disabled, squash the registers. */
4564 if (! TARGET_AVX512F)
4566 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4567 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4569 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4570 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4573 /* If MPX is disabled, squash the registers. */
4574 if (! TARGET_MPX)
4575 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
4576 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4580 /* Save the current options */
4582 static void
4583 ix86_function_specific_save (struct cl_target_option *ptr,
4584 struct gcc_options *opts)
4586 ptr->arch = ix86_arch;
4587 ptr->schedule = ix86_schedule;
4588 ptr->prefetch_sse = x86_prefetch_sse;
4589 ptr->tune = ix86_tune;
4590 ptr->branch_cost = ix86_branch_cost;
4591 ptr->tune_defaulted = ix86_tune_defaulted;
4592 ptr->arch_specified = ix86_arch_specified;
4593 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4594 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4595 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4596 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4597 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4598 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4599 ptr->x_ix86_abi = opts->x_ix86_abi;
4600 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4601 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4602 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4603 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4604 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4605 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4606 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4607 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4608 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4609 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4610 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4611 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4612 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4613 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4614 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4615 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4616 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4617 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4618 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4619 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4621 /* The fields are char but the variables are not; make sure the
4622 values fit in the fields. */
4623 gcc_assert (ptr->arch == ix86_arch);
4624 gcc_assert (ptr->schedule == ix86_schedule);
4625 gcc_assert (ptr->tune == ix86_tune);
4626 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4629 /* Restore the current options */
4631 static void
4632 ix86_function_specific_restore (struct gcc_options *opts,
4633 struct cl_target_option *ptr)
4635 enum processor_type old_tune = ix86_tune;
4636 enum processor_type old_arch = ix86_arch;
4637 unsigned int ix86_arch_mask;
4638 int i;
4640 /* We don't change -fPIC. */
4641 opts->x_flag_pic = flag_pic;
4643 ix86_arch = (enum processor_type) ptr->arch;
4644 ix86_schedule = (enum attr_cpu) ptr->schedule;
4645 ix86_tune = (enum processor_type) ptr->tune;
4646 x86_prefetch_sse = ptr->prefetch_sse;
4647 opts->x_ix86_branch_cost = ptr->branch_cost;
4648 ix86_tune_defaulted = ptr->tune_defaulted;
4649 ix86_arch_specified = ptr->arch_specified;
4650 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4651 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4652 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4653 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4654 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4655 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4656 opts->x_ix86_abi = ptr->x_ix86_abi;
4657 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4658 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4659 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4660 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4661 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4662 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4663 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4664 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4665 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4666 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4667 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4668 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4669 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4670 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4671 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4672 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4673 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4674 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4675 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4676 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4677 ix86_tune_cost = processor_target_table[ix86_tune].cost;
4678 /* TODO: ix86_cost should be chosen at instruction or function granuality
4679 so for cold code we use size_cost even in !optimize_size compilation. */
4680 if (opts->x_optimize_size)
4681 ix86_cost = &ix86_size_cost;
4682 else
4683 ix86_cost = ix86_tune_cost;
4685 /* Recreate the arch feature tests if the arch changed */
4686 if (old_arch != ix86_arch)
4688 ix86_arch_mask = 1u << ix86_arch;
4689 for (i = 0; i < X86_ARCH_LAST; ++i)
4690 ix86_arch_features[i]
4691 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4694 /* Recreate the tune optimization tests */
4695 if (old_tune != ix86_tune)
4696 set_ix86_tune_features (ix86_tune, false);
4699 /* Adjust target options after streaming them in. This is mainly about
4700 reconciling them with global options. */
4702 static void
4703 ix86_function_specific_post_stream_in (struct cl_target_option *ptr)
4705 /* flag_pic is a global option, but ix86_cmodel is target saved option
4706 partly computed from flag_pic. If flag_pic is on, adjust x_ix86_cmodel
4707 for PIC, or error out. */
4708 if (flag_pic)
4709 switch (ptr->x_ix86_cmodel)
4711 case CM_SMALL:
4712 ptr->x_ix86_cmodel = CM_SMALL_PIC;
4713 break;
4715 case CM_MEDIUM:
4716 ptr->x_ix86_cmodel = CM_MEDIUM_PIC;
4717 break;
4719 case CM_LARGE:
4720 ptr->x_ix86_cmodel = CM_LARGE_PIC;
4721 break;
4723 case CM_KERNEL:
4724 error ("code model %s does not support PIC mode", "kernel");
4725 break;
4727 default:
4728 break;
4730 else
4731 switch (ptr->x_ix86_cmodel)
4733 case CM_SMALL_PIC:
4734 ptr->x_ix86_cmodel = CM_SMALL;
4735 break;
4737 case CM_MEDIUM_PIC:
4738 ptr->x_ix86_cmodel = CM_MEDIUM;
4739 break;
4741 case CM_LARGE_PIC:
4742 ptr->x_ix86_cmodel = CM_LARGE;
4743 break;
4745 default:
4746 break;
4750 /* Print the current options */
4752 static void
4753 ix86_function_specific_print (FILE *file, int indent,
4754 struct cl_target_option *ptr)
4756 char *target_string
4757 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4758 NULL, NULL, ptr->x_ix86_fpmath, false);
4760 gcc_assert (ptr->arch < PROCESSOR_max);
4761 fprintf (file, "%*sarch = %d (%s)\n",
4762 indent, "",
4763 ptr->arch, processor_target_table[ptr->arch].name);
4765 gcc_assert (ptr->tune < PROCESSOR_max);
4766 fprintf (file, "%*stune = %d (%s)\n",
4767 indent, "",
4768 ptr->tune, processor_target_table[ptr->tune].name);
4770 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4772 if (target_string)
4774 fprintf (file, "%*s%s\n", indent, "", target_string);
4775 free (target_string);
4780 /* Inner function to process the attribute((target(...))), take an argument and
4781 set the current options from the argument. If we have a list, recursively go
4782 over the list. */
4784 static bool
4785 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4786 struct gcc_options *opts,
4787 struct gcc_options *opts_set,
4788 struct gcc_options *enum_opts_set)
4790 char *next_optstr;
4791 bool ret = true;
4793 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4794 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4795 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4796 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4797 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4799 enum ix86_opt_type
4801 ix86_opt_unknown,
4802 ix86_opt_yes,
4803 ix86_opt_no,
4804 ix86_opt_str,
4805 ix86_opt_enum,
4806 ix86_opt_isa
4809 static const struct
4811 const char *string;
4812 size_t len;
4813 enum ix86_opt_type type;
4814 int opt;
4815 int mask;
4816 } attrs[] = {
4817 /* isa options */
4818 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4819 IX86_ATTR_ISA ("abm", OPT_mabm),
4820 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4821 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4822 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4823 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4824 IX86_ATTR_ISA ("aes", OPT_maes),
4825 IX86_ATTR_ISA ("sha", OPT_msha),
4826 IX86_ATTR_ISA ("avx", OPT_mavx),
4827 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4828 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4829 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4830 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4831 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4832 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
4833 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
4834 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
4835 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4836 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4837 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4838 IX86_ATTR_ISA ("sse", OPT_msse),
4839 IX86_ATTR_ISA ("sse2", OPT_msse2),
4840 IX86_ATTR_ISA ("sse3", OPT_msse3),
4841 IX86_ATTR_ISA ("sse4", OPT_msse4),
4842 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4843 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4844 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4845 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4846 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4847 IX86_ATTR_ISA ("fma", OPT_mfma),
4848 IX86_ATTR_ISA ("xop", OPT_mxop),
4849 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4850 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4851 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4852 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4853 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4854 IX86_ATTR_ISA ("hle", OPT_mhle),
4855 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4856 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4857 IX86_ATTR_ISA ("adx", OPT_madx),
4858 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4859 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4860 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4861 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4862 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
4863 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
4864 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
4865 IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi),
4866 IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma),
4867 IX86_ATTR_ISA ("clwb", OPT_mclwb),
4868 IX86_ATTR_ISA ("pcommit", OPT_mpcommit),
4869 IX86_ATTR_ISA ("mwaitx", OPT_mmwaitx),
4871 /* enum options */
4872 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4874 /* string options */
4875 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4876 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4878 /* flag options */
4879 IX86_ATTR_YES ("cld",
4880 OPT_mcld,
4881 MASK_CLD),
4883 IX86_ATTR_NO ("fancy-math-387",
4884 OPT_mfancy_math_387,
4885 MASK_NO_FANCY_MATH_387),
4887 IX86_ATTR_YES ("ieee-fp",
4888 OPT_mieee_fp,
4889 MASK_IEEE_FP),
4891 IX86_ATTR_YES ("inline-all-stringops",
4892 OPT_minline_all_stringops,
4893 MASK_INLINE_ALL_STRINGOPS),
4895 IX86_ATTR_YES ("inline-stringops-dynamically",
4896 OPT_minline_stringops_dynamically,
4897 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4899 IX86_ATTR_NO ("align-stringops",
4900 OPT_mno_align_stringops,
4901 MASK_NO_ALIGN_STRINGOPS),
4903 IX86_ATTR_YES ("recip",
4904 OPT_mrecip,
4905 MASK_RECIP),
4909 /* If this is a list, recurse to get the options. */
4910 if (TREE_CODE (args) == TREE_LIST)
4912 bool ret = true;
4914 for (; args; args = TREE_CHAIN (args))
4915 if (TREE_VALUE (args)
4916 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4917 p_strings, opts, opts_set,
4918 enum_opts_set))
4919 ret = false;
4921 return ret;
4924 else if (TREE_CODE (args) != STRING_CST)
4926 error ("attribute %<target%> argument not a string");
4927 return false;
4930 /* Handle multiple arguments separated by commas. */
4931 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4933 while (next_optstr && *next_optstr != '\0')
4935 char *p = next_optstr;
4936 char *orig_p = p;
4937 char *comma = strchr (next_optstr, ',');
4938 const char *opt_string;
4939 size_t len, opt_len;
4940 int opt;
4941 bool opt_set_p;
4942 char ch;
4943 unsigned i;
4944 enum ix86_opt_type type = ix86_opt_unknown;
4945 int mask = 0;
4947 if (comma)
4949 *comma = '\0';
4950 len = comma - next_optstr;
4951 next_optstr = comma + 1;
4953 else
4955 len = strlen (p);
4956 next_optstr = NULL;
4959 /* Recognize no-xxx. */
4960 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4962 opt_set_p = false;
4963 p += 3;
4964 len -= 3;
4966 else
4967 opt_set_p = true;
4969 /* Find the option. */
4970 ch = *p;
4971 opt = N_OPTS;
4972 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4974 type = attrs[i].type;
4975 opt_len = attrs[i].len;
4976 if (ch == attrs[i].string[0]
4977 && ((type != ix86_opt_str && type != ix86_opt_enum)
4978 ? len == opt_len
4979 : len > opt_len)
4980 && memcmp (p, attrs[i].string, opt_len) == 0)
4982 opt = attrs[i].opt;
4983 mask = attrs[i].mask;
4984 opt_string = attrs[i].string;
4985 break;
4989 /* Process the option. */
4990 if (opt == N_OPTS)
4992 error ("attribute(target(\"%s\")) is unknown", orig_p);
4993 ret = false;
4996 else if (type == ix86_opt_isa)
4998 struct cl_decoded_option decoded;
5000 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
5001 ix86_handle_option (opts, opts_set,
5002 &decoded, input_location);
5005 else if (type == ix86_opt_yes || type == ix86_opt_no)
5007 if (type == ix86_opt_no)
5008 opt_set_p = !opt_set_p;
5010 if (opt_set_p)
5011 opts->x_target_flags |= mask;
5012 else
5013 opts->x_target_flags &= ~mask;
5016 else if (type == ix86_opt_str)
5018 if (p_strings[opt])
5020 error ("option(\"%s\") was already specified", opt_string);
5021 ret = false;
5023 else
5024 p_strings[opt] = xstrdup (p + opt_len);
5027 else if (type == ix86_opt_enum)
5029 bool arg_ok;
5030 int value;
5032 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
5033 if (arg_ok)
5034 set_option (opts, enum_opts_set, opt, value,
5035 p + opt_len, DK_UNSPECIFIED, input_location,
5036 global_dc);
5037 else
5039 error ("attribute(target(\"%s\")) is unknown", orig_p);
5040 ret = false;
5044 else
5045 gcc_unreachable ();
5048 return ret;
5051 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
5053 tree
5054 ix86_valid_target_attribute_tree (tree args,
5055 struct gcc_options *opts,
5056 struct gcc_options *opts_set)
5058 const char *orig_arch_string = opts->x_ix86_arch_string;
5059 const char *orig_tune_string = opts->x_ix86_tune_string;
5060 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
5061 int orig_tune_defaulted = ix86_tune_defaulted;
5062 int orig_arch_specified = ix86_arch_specified;
5063 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
5064 tree t = NULL_TREE;
5065 int i;
5066 struct cl_target_option *def
5067 = TREE_TARGET_OPTION (target_option_default_node);
5068 struct gcc_options enum_opts_set;
5070 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
5072 /* Process each of the options on the chain. */
5073 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
5074 opts_set, &enum_opts_set))
5075 return error_mark_node;
5077 /* If the changed options are different from the default, rerun
5078 ix86_option_override_internal, and then save the options away.
5079 The string options are are attribute options, and will be undone
5080 when we copy the save structure. */
5081 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
5082 || opts->x_target_flags != def->x_target_flags
5083 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
5084 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
5085 || enum_opts_set.x_ix86_fpmath)
5087 /* If we are using the default tune= or arch=, undo the string assigned,
5088 and use the default. */
5089 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
5090 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
5091 else if (!orig_arch_specified)
5092 opts->x_ix86_arch_string = NULL;
5094 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
5095 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
5096 else if (orig_tune_defaulted)
5097 opts->x_ix86_tune_string = NULL;
5099 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
5100 if (enum_opts_set.x_ix86_fpmath)
5101 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
5102 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
5103 && TARGET_SSE_P (opts->x_ix86_isa_flags))
5105 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
5106 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
5109 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
5110 ix86_option_override_internal (false, opts, opts_set);
5112 /* Add any builtin functions with the new isa if any. */
5113 ix86_add_new_builtins (opts->x_ix86_isa_flags);
5115 /* Save the current options unless we are validating options for
5116 #pragma. */
5117 t = build_target_option_node (opts);
5119 opts->x_ix86_arch_string = orig_arch_string;
5120 opts->x_ix86_tune_string = orig_tune_string;
5121 opts_set->x_ix86_fpmath = orig_fpmath_set;
5123 /* Free up memory allocated to hold the strings */
5124 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
5125 free (option_strings[i]);
5128 return t;
5131 /* Hook to validate attribute((target("string"))). */
5133 static bool
5134 ix86_valid_target_attribute_p (tree fndecl,
5135 tree ARG_UNUSED (name),
5136 tree args,
5137 int ARG_UNUSED (flags))
5139 struct gcc_options func_options;
5140 tree new_target, new_optimize;
5141 bool ret = true;
5143 /* attribute((target("default"))) does nothing, beyond
5144 affecting multi-versioning. */
5145 if (TREE_VALUE (args)
5146 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
5147 && TREE_CHAIN (args) == NULL_TREE
5148 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
5149 return true;
5151 tree old_optimize = build_optimization_node (&global_options);
5153 /* Get the optimization options of the current function. */
5154 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
5156 if (!func_optimize)
5157 func_optimize = old_optimize;
5159 /* Init func_options. */
5160 memset (&func_options, 0, sizeof (func_options));
5161 init_options_struct (&func_options, NULL);
5162 lang_hooks.init_options_struct (&func_options);
5164 cl_optimization_restore (&func_options,
5165 TREE_OPTIMIZATION (func_optimize));
5167 /* Initialize func_options to the default before its target options can
5168 be set. */
5169 cl_target_option_restore (&func_options,
5170 TREE_TARGET_OPTION (target_option_default_node));
5172 new_target = ix86_valid_target_attribute_tree (args, &func_options,
5173 &global_options_set);
5175 new_optimize = build_optimization_node (&func_options);
5177 if (new_target == error_mark_node)
5178 ret = false;
5180 else if (fndecl && new_target)
5182 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
5184 if (old_optimize != new_optimize)
5185 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
5188 return ret;
5192 /* Hook to determine if one function can safely inline another. */
5194 static bool
5195 ix86_can_inline_p (tree caller, tree callee)
5197 bool ret = false;
5198 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
5199 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
5201 /* If callee has no option attributes, then it is ok to inline. */
5202 if (!callee_tree)
5203 ret = true;
5205 /* If caller has no option attributes, but callee does then it is not ok to
5206 inline. */
5207 else if (!caller_tree)
5208 ret = false;
5210 else
5212 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
5213 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
5215 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
5216 can inline a SSE2 function but a SSE2 function can't inline a SSE4
5217 function. */
5218 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
5219 != callee_opts->x_ix86_isa_flags)
5220 ret = false;
5222 /* See if we have the same non-isa options. */
5223 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
5224 ret = false;
5226 /* See if arch, tune, etc. are the same. */
5227 else if (caller_opts->arch != callee_opts->arch)
5228 ret = false;
5230 else if (caller_opts->tune != callee_opts->tune)
5231 ret = false;
5233 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
5234 ret = false;
5236 else if (caller_opts->branch_cost != callee_opts->branch_cost)
5237 ret = false;
5239 else
5240 ret = true;
5243 return ret;
5247 /* Remember the last target of ix86_set_current_function. */
5248 static GTY(()) tree ix86_previous_fndecl;
5250 /* Set targets globals to the default (or current #pragma GCC target
5251 if active). Invalidate ix86_previous_fndecl cache. */
5253 void
5254 ix86_reset_previous_fndecl (void)
5256 tree new_tree = target_option_current_node;
5257 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5258 if (TREE_TARGET_GLOBALS (new_tree))
5259 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5260 else if (new_tree == target_option_default_node)
5261 restore_target_globals (&default_target_globals);
5262 else
5263 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5264 ix86_previous_fndecl = NULL_TREE;
5267 /* Establish appropriate back-end context for processing the function
5268 FNDECL. The argument might be NULL to indicate processing at top
5269 level, outside of any function scope. */
5270 static void
5271 ix86_set_current_function (tree fndecl)
5273 /* Only change the context if the function changes. This hook is called
5274 several times in the course of compiling a function, and we don't want to
5275 slow things down too much or call target_reinit when it isn't safe. */
5276 if (fndecl == ix86_previous_fndecl)
5277 return;
5279 tree old_tree;
5280 if (ix86_previous_fndecl == NULL_TREE)
5281 old_tree = target_option_current_node;
5282 else if (DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl))
5283 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl);
5284 else
5285 old_tree = target_option_default_node;
5287 if (fndecl == NULL_TREE)
5289 if (old_tree != target_option_current_node)
5290 ix86_reset_previous_fndecl ();
5291 return;
5294 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
5295 if (new_tree == NULL_TREE)
5296 new_tree = target_option_default_node;
5298 if (old_tree != new_tree)
5300 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5301 if (TREE_TARGET_GLOBALS (new_tree))
5302 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5303 else if (new_tree == target_option_default_node)
5304 restore_target_globals (&default_target_globals);
5305 else
5306 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5308 ix86_previous_fndecl = fndecl;
5312 /* Return true if this goes in large data/bss. */
5314 static bool
5315 ix86_in_large_data_p (tree exp)
5317 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5318 return false;
5320 /* Functions are never large data. */
5321 if (TREE_CODE (exp) == FUNCTION_DECL)
5322 return false;
5324 /* Automatic variables are never large data. */
5325 if (TREE_CODE (exp) == VAR_DECL && !is_global_var (exp))
5326 return false;
5328 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5330 const char *section = DECL_SECTION_NAME (exp);
5331 if (strcmp (section, ".ldata") == 0
5332 || strcmp (section, ".lbss") == 0)
5333 return true;
5334 return false;
5336 else
5338 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5340 /* If this is an incomplete type with size 0, then we can't put it
5341 in data because it might be too big when completed. Also,
5342 int_size_in_bytes returns -1 if size can vary or is larger than
5343 an integer in which case also it is safer to assume that it goes in
5344 large data. */
5345 if (size <= 0 || size > ix86_section_threshold)
5346 return true;
5349 return false;
5352 /* Switch to the appropriate section for output of DECL.
5353 DECL is either a `VAR_DECL' node or a constant of some sort.
5354 RELOC indicates whether forming the initial value of DECL requires
5355 link-time relocations. */
5357 ATTRIBUTE_UNUSED static section *
5358 x86_64_elf_select_section (tree decl, int reloc,
5359 unsigned HOST_WIDE_INT align)
5361 if (ix86_in_large_data_p (decl))
5363 const char *sname = NULL;
5364 unsigned int flags = SECTION_WRITE;
5365 switch (categorize_decl_for_section (decl, reloc))
5367 case SECCAT_DATA:
5368 sname = ".ldata";
5369 break;
5370 case SECCAT_DATA_REL:
5371 sname = ".ldata.rel";
5372 break;
5373 case SECCAT_DATA_REL_LOCAL:
5374 sname = ".ldata.rel.local";
5375 break;
5376 case SECCAT_DATA_REL_RO:
5377 sname = ".ldata.rel.ro";
5378 break;
5379 case SECCAT_DATA_REL_RO_LOCAL:
5380 sname = ".ldata.rel.ro.local";
5381 break;
5382 case SECCAT_BSS:
5383 sname = ".lbss";
5384 flags |= SECTION_BSS;
5385 break;
5386 case SECCAT_RODATA:
5387 case SECCAT_RODATA_MERGE_STR:
5388 case SECCAT_RODATA_MERGE_STR_INIT:
5389 case SECCAT_RODATA_MERGE_CONST:
5390 sname = ".lrodata";
5391 flags = 0;
5392 break;
5393 case SECCAT_SRODATA:
5394 case SECCAT_SDATA:
5395 case SECCAT_SBSS:
5396 gcc_unreachable ();
5397 case SECCAT_TEXT:
5398 case SECCAT_TDATA:
5399 case SECCAT_TBSS:
5400 /* We don't split these for medium model. Place them into
5401 default sections and hope for best. */
5402 break;
5404 if (sname)
5406 /* We might get called with string constants, but get_named_section
5407 doesn't like them as they are not DECLs. Also, we need to set
5408 flags in that case. */
5409 if (!DECL_P (decl))
5410 return get_section (sname, flags, NULL);
5411 return get_named_section (decl, sname, reloc);
5414 return default_elf_select_section (decl, reloc, align);
5417 /* Select a set of attributes for section NAME based on the properties
5418 of DECL and whether or not RELOC indicates that DECL's initializer
5419 might contain runtime relocations. */
5421 static unsigned int ATTRIBUTE_UNUSED
5422 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5424 unsigned int flags = default_section_type_flags (decl, name, reloc);
5426 if (decl == NULL_TREE
5427 && (strcmp (name, ".ldata.rel.ro") == 0
5428 || strcmp (name, ".ldata.rel.ro.local") == 0))
5429 flags |= SECTION_RELRO;
5431 if (strcmp (name, ".lbss") == 0
5432 || strncmp (name, ".lbss.", 5) == 0
5433 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5434 flags |= SECTION_BSS;
5436 return flags;
5439 /* Build up a unique section name, expressed as a
5440 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5441 RELOC indicates whether the initial value of EXP requires
5442 link-time relocations. */
5444 static void ATTRIBUTE_UNUSED
5445 x86_64_elf_unique_section (tree decl, int reloc)
5447 if (ix86_in_large_data_p (decl))
5449 const char *prefix = NULL;
5450 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5451 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5453 switch (categorize_decl_for_section (decl, reloc))
5455 case SECCAT_DATA:
5456 case SECCAT_DATA_REL:
5457 case SECCAT_DATA_REL_LOCAL:
5458 case SECCAT_DATA_REL_RO:
5459 case SECCAT_DATA_REL_RO_LOCAL:
5460 prefix = one_only ? ".ld" : ".ldata";
5461 break;
5462 case SECCAT_BSS:
5463 prefix = one_only ? ".lb" : ".lbss";
5464 break;
5465 case SECCAT_RODATA:
5466 case SECCAT_RODATA_MERGE_STR:
5467 case SECCAT_RODATA_MERGE_STR_INIT:
5468 case SECCAT_RODATA_MERGE_CONST:
5469 prefix = one_only ? ".lr" : ".lrodata";
5470 break;
5471 case SECCAT_SRODATA:
5472 case SECCAT_SDATA:
5473 case SECCAT_SBSS:
5474 gcc_unreachable ();
5475 case SECCAT_TEXT:
5476 case SECCAT_TDATA:
5477 case SECCAT_TBSS:
5478 /* We don't split these for medium model. Place them into
5479 default sections and hope for best. */
5480 break;
5482 if (prefix)
5484 const char *name, *linkonce;
5485 char *string;
5487 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5488 name = targetm.strip_name_encoding (name);
5490 /* If we're using one_only, then there needs to be a .gnu.linkonce
5491 prefix to the section name. */
5492 linkonce = one_only ? ".gnu.linkonce" : "";
5494 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5496 set_decl_section_name (decl, string);
5497 return;
5500 default_unique_section (decl, reloc);
5503 #ifdef COMMON_ASM_OP
5504 /* This says how to output assembler code to declare an
5505 uninitialized external linkage data object.
5507 For medium model x86-64 we need to use .largecomm opcode for
5508 large objects. */
5509 void
5510 x86_elf_aligned_common (FILE *file,
5511 const char *name, unsigned HOST_WIDE_INT size,
5512 int align)
5514 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5515 && size > (unsigned int)ix86_section_threshold)
5516 fputs ("\t.largecomm\t", file);
5517 else
5518 fputs (COMMON_ASM_OP, file);
5519 assemble_name (file, name);
5520 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5521 size, align / BITS_PER_UNIT);
5523 #endif
5525 /* Utility function for targets to use in implementing
5526 ASM_OUTPUT_ALIGNED_BSS. */
5528 void
5529 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
5530 unsigned HOST_WIDE_INT size, int align)
5532 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5533 && size > (unsigned int)ix86_section_threshold)
5534 switch_to_section (get_named_section (decl, ".lbss", 0));
5535 else
5536 switch_to_section (bss_section);
5537 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5538 #ifdef ASM_DECLARE_OBJECT_NAME
5539 last_assemble_variable_decl = decl;
5540 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5541 #else
5542 /* Standard thing is just output label for the object. */
5543 ASM_OUTPUT_LABEL (file, name);
5544 #endif /* ASM_DECLARE_OBJECT_NAME */
5545 ASM_OUTPUT_SKIP (file, size ? size : 1);
5548 /* Decide whether we must probe the stack before any space allocation
5549 on this target. It's essentially TARGET_STACK_PROBE except when
5550 -fstack-check causes the stack to be already probed differently. */
5552 bool
5553 ix86_target_stack_probe (void)
5555 /* Do not probe the stack twice if static stack checking is enabled. */
5556 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5557 return false;
5559 return TARGET_STACK_PROBE;
5562 /* Decide whether we can make a sibling call to a function. DECL is the
5563 declaration of the function being targeted by the call and EXP is the
5564 CALL_EXPR representing the call. */
5566 static bool
5567 ix86_function_ok_for_sibcall (tree decl, tree exp)
5569 tree type, decl_or_type;
5570 rtx a, b;
5572 /* If we are generating position-independent code, we cannot sibcall
5573 optimize direct calls to global functions, as the PLT requires
5574 %ebx be live. (Darwin does not have a PLT.) */
5575 if (!TARGET_MACHO
5576 && !TARGET_64BIT
5577 && flag_pic
5578 && flag_plt
5579 && decl && !targetm.binds_local_p (decl))
5580 return false;
5582 /* If we need to align the outgoing stack, then sibcalling would
5583 unalign the stack, which may break the called function. */
5584 if (ix86_minimum_incoming_stack_boundary (true)
5585 < PREFERRED_STACK_BOUNDARY)
5586 return false;
5588 if (decl)
5590 decl_or_type = decl;
5591 type = TREE_TYPE (decl);
5593 else
5595 /* We're looking at the CALL_EXPR, we need the type of the function. */
5596 type = CALL_EXPR_FN (exp); /* pointer expression */
5597 type = TREE_TYPE (type); /* pointer type */
5598 type = TREE_TYPE (type); /* function type */
5599 decl_or_type = type;
5602 /* Check that the return value locations are the same. Like
5603 if we are returning floats on the 80387 register stack, we cannot
5604 make a sibcall from a function that doesn't return a float to a
5605 function that does or, conversely, from a function that does return
5606 a float to a function that doesn't; the necessary stack adjustment
5607 would not be executed. This is also the place we notice
5608 differences in the return value ABI. Note that it is ok for one
5609 of the functions to have void return type as long as the return
5610 value of the other is passed in a register. */
5611 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5612 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5613 cfun->decl, false);
5614 if (STACK_REG_P (a) || STACK_REG_P (b))
5616 if (!rtx_equal_p (a, b))
5617 return false;
5619 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5621 else if (!rtx_equal_p (a, b))
5622 return false;
5624 if (TARGET_64BIT)
5626 /* The SYSV ABI has more call-clobbered registers;
5627 disallow sibcalls from MS to SYSV. */
5628 if (cfun->machine->call_abi == MS_ABI
5629 && ix86_function_type_abi (type) == SYSV_ABI)
5630 return false;
5632 else
5634 /* If this call is indirect, we'll need to be able to use a
5635 call-clobbered register for the address of the target function.
5636 Make sure that all such registers are not used for passing
5637 parameters. Note that DLLIMPORT functions are indirect. */
5638 if (!decl
5639 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5641 if (ix86_function_regparm (type, NULL) >= 3)
5643 /* ??? Need to count the actual number of registers to be used,
5644 not the possible number of registers. Fix later. */
5645 return false;
5650 /* Otherwise okay. That also includes certain types of indirect calls. */
5651 return true;
5654 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5655 and "sseregparm" calling convention attributes;
5656 arguments as in struct attribute_spec.handler. */
5658 static tree
5659 ix86_handle_cconv_attribute (tree *node, tree name,
5660 tree args,
5661 int,
5662 bool *no_add_attrs)
5664 if (TREE_CODE (*node) != FUNCTION_TYPE
5665 && TREE_CODE (*node) != METHOD_TYPE
5666 && TREE_CODE (*node) != FIELD_DECL
5667 && TREE_CODE (*node) != TYPE_DECL)
5669 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5670 name);
5671 *no_add_attrs = true;
5672 return NULL_TREE;
5675 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5676 if (is_attribute_p ("regparm", name))
5678 tree cst;
5680 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5682 error ("fastcall and regparm attributes are not compatible");
5685 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5687 error ("regparam and thiscall attributes are not compatible");
5690 cst = TREE_VALUE (args);
5691 if (TREE_CODE (cst) != INTEGER_CST)
5693 warning (OPT_Wattributes,
5694 "%qE attribute requires an integer constant argument",
5695 name);
5696 *no_add_attrs = true;
5698 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5700 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5701 name, REGPARM_MAX);
5702 *no_add_attrs = true;
5705 return NULL_TREE;
5708 if (TARGET_64BIT)
5710 /* Do not warn when emulating the MS ABI. */
5711 if ((TREE_CODE (*node) != FUNCTION_TYPE
5712 && TREE_CODE (*node) != METHOD_TYPE)
5713 || ix86_function_type_abi (*node) != MS_ABI)
5714 warning (OPT_Wattributes, "%qE attribute ignored",
5715 name);
5716 *no_add_attrs = true;
5717 return NULL_TREE;
5720 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5721 if (is_attribute_p ("fastcall", name))
5723 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5725 error ("fastcall and cdecl attributes are not compatible");
5727 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5729 error ("fastcall and stdcall attributes are not compatible");
5731 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5733 error ("fastcall and regparm attributes are not compatible");
5735 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5737 error ("fastcall and thiscall attributes are not compatible");
5741 /* Can combine stdcall with fastcall (redundant), regparm and
5742 sseregparm. */
5743 else if (is_attribute_p ("stdcall", name))
5745 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5747 error ("stdcall and cdecl attributes are not compatible");
5749 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5751 error ("stdcall and fastcall attributes are not compatible");
5753 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5755 error ("stdcall and thiscall attributes are not compatible");
5759 /* Can combine cdecl with regparm and sseregparm. */
5760 else if (is_attribute_p ("cdecl", name))
5762 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5764 error ("stdcall and cdecl attributes are not compatible");
5766 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5768 error ("fastcall and cdecl attributes are not compatible");
5770 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5772 error ("cdecl and thiscall attributes are not compatible");
5775 else if (is_attribute_p ("thiscall", name))
5777 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5778 warning (OPT_Wattributes, "%qE attribute is used for non-class method",
5779 name);
5780 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5782 error ("stdcall and thiscall attributes are not compatible");
5784 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5786 error ("fastcall and thiscall attributes are not compatible");
5788 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5790 error ("cdecl and thiscall attributes are not compatible");
5794 /* Can combine sseregparm with all attributes. */
5796 return NULL_TREE;
5799 /* The transactional memory builtins are implicitly regparm or fastcall
5800 depending on the ABI. Override the generic do-nothing attribute that
5801 these builtins were declared with, and replace it with one of the two
5802 attributes that we expect elsewhere. */
5804 static tree
5805 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
5806 int flags, bool *no_add_attrs)
5808 tree alt;
5810 /* In no case do we want to add the placeholder attribute. */
5811 *no_add_attrs = true;
5813 /* The 64-bit ABI is unchanged for transactional memory. */
5814 if (TARGET_64BIT)
5815 return NULL_TREE;
5817 /* ??? Is there a better way to validate 32-bit windows? We have
5818 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5819 if (CHECK_STACK_LIMIT > 0)
5820 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5821 else
5823 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5824 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5826 decl_attributes (node, alt, flags);
5828 return NULL_TREE;
5831 /* This function determines from TYPE the calling-convention. */
5833 unsigned int
5834 ix86_get_callcvt (const_tree type)
5836 unsigned int ret = 0;
5837 bool is_stdarg;
5838 tree attrs;
5840 if (TARGET_64BIT)
5841 return IX86_CALLCVT_CDECL;
5843 attrs = TYPE_ATTRIBUTES (type);
5844 if (attrs != NULL_TREE)
5846 if (lookup_attribute ("cdecl", attrs))
5847 ret |= IX86_CALLCVT_CDECL;
5848 else if (lookup_attribute ("stdcall", attrs))
5849 ret |= IX86_CALLCVT_STDCALL;
5850 else if (lookup_attribute ("fastcall", attrs))
5851 ret |= IX86_CALLCVT_FASTCALL;
5852 else if (lookup_attribute ("thiscall", attrs))
5853 ret |= IX86_CALLCVT_THISCALL;
5855 /* Regparam isn't allowed for thiscall and fastcall. */
5856 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5858 if (lookup_attribute ("regparm", attrs))
5859 ret |= IX86_CALLCVT_REGPARM;
5860 if (lookup_attribute ("sseregparm", attrs))
5861 ret |= IX86_CALLCVT_SSEREGPARM;
5864 if (IX86_BASE_CALLCVT(ret) != 0)
5865 return ret;
5868 is_stdarg = stdarg_p (type);
5869 if (TARGET_RTD && !is_stdarg)
5870 return IX86_CALLCVT_STDCALL | ret;
5872 if (ret != 0
5873 || is_stdarg
5874 || TREE_CODE (type) != METHOD_TYPE
5875 || ix86_function_type_abi (type) != MS_ABI)
5876 return IX86_CALLCVT_CDECL | ret;
5878 return IX86_CALLCVT_THISCALL;
5881 /* Return 0 if the attributes for two types are incompatible, 1 if they
5882 are compatible, and 2 if they are nearly compatible (which causes a
5883 warning to be generated). */
5885 static int
5886 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5888 unsigned int ccvt1, ccvt2;
5890 if (TREE_CODE (type1) != FUNCTION_TYPE
5891 && TREE_CODE (type1) != METHOD_TYPE)
5892 return 1;
5894 ccvt1 = ix86_get_callcvt (type1);
5895 ccvt2 = ix86_get_callcvt (type2);
5896 if (ccvt1 != ccvt2)
5897 return 0;
5898 if (ix86_function_regparm (type1, NULL)
5899 != ix86_function_regparm (type2, NULL))
5900 return 0;
5902 return 1;
5905 /* Return the regparm value for a function with the indicated TYPE and DECL.
5906 DECL may be NULL when calling function indirectly
5907 or considering a libcall. */
5909 static int
5910 ix86_function_regparm (const_tree type, const_tree decl)
5912 tree attr;
5913 int regparm;
5914 unsigned int ccvt;
5916 if (TARGET_64BIT)
5917 return (ix86_function_type_abi (type) == SYSV_ABI
5918 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5919 ccvt = ix86_get_callcvt (type);
5920 regparm = ix86_regparm;
5922 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5924 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5925 if (attr)
5927 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5928 return regparm;
5931 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5932 return 2;
5933 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5934 return 1;
5936 /* Use register calling convention for local functions when possible. */
5937 if (decl
5938 && TREE_CODE (decl) == FUNCTION_DECL)
5940 cgraph_node *target = cgraph_node::get (decl);
5941 if (target)
5942 target = target->function_symbol ();
5944 /* Caller and callee must agree on the calling convention, so
5945 checking here just optimize means that with
5946 __attribute__((optimize (...))) caller could use regparm convention
5947 and callee not, or vice versa. Instead look at whether the callee
5948 is optimized or not. */
5949 if (target && opt_for_fn (target->decl, optimize)
5950 && !(profile_flag && !flag_fentry))
5952 cgraph_local_info *i = &target->local;
5953 if (i && i->local && i->can_change_signature)
5955 int local_regparm, globals = 0, regno;
5957 /* Make sure no regparm register is taken by a
5958 fixed register variable. */
5959 for (local_regparm = 0; local_regparm < REGPARM_MAX;
5960 local_regparm++)
5961 if (fixed_regs[local_regparm])
5962 break;
5964 /* We don't want to use regparm(3) for nested functions as
5965 these use a static chain pointer in the third argument. */
5966 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
5967 local_regparm = 2;
5969 /* Save a register for the split stack. */
5970 if (local_regparm == 3 && flag_split_stack)
5971 local_regparm = 2;
5973 /* Each fixed register usage increases register pressure,
5974 so less registers should be used for argument passing.
5975 This functionality can be overriden by an explicit
5976 regparm value. */
5977 for (regno = AX_REG; regno <= DI_REG; regno++)
5978 if (fixed_regs[regno])
5979 globals++;
5981 local_regparm
5982 = globals < local_regparm ? local_regparm - globals : 0;
5984 if (local_regparm > regparm)
5985 regparm = local_regparm;
5990 return regparm;
5993 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5994 DFmode (2) arguments in SSE registers for a function with the
5995 indicated TYPE and DECL. DECL may be NULL when calling function
5996 indirectly or considering a libcall. Return -1 if any FP parameter
5997 should be rejected by error. This is used in siutation we imply SSE
5998 calling convetion but the function is called from another function with
5999 SSE disabled. Otherwise return 0. */
6001 static int
6002 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
6004 gcc_assert (!TARGET_64BIT);
6006 /* Use SSE registers to pass SFmode and DFmode arguments if requested
6007 by the sseregparm attribute. */
6008 if (TARGET_SSEREGPARM
6009 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
6011 if (!TARGET_SSE)
6013 if (warn)
6015 if (decl)
6016 error ("calling %qD with attribute sseregparm without "
6017 "SSE/SSE2 enabled", decl);
6018 else
6019 error ("calling %qT with attribute sseregparm without "
6020 "SSE/SSE2 enabled", type);
6022 return 0;
6025 return 2;
6028 if (!decl)
6029 return 0;
6031 cgraph_node *target = cgraph_node::get (decl);
6032 if (target)
6033 target = target->function_symbol ();
6035 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
6036 (and DFmode for SSE2) arguments in SSE registers. */
6037 if (target
6038 /* TARGET_SSE_MATH */
6039 && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
6040 && opt_for_fn (target->decl, optimize)
6041 && !(profile_flag && !flag_fentry))
6043 cgraph_local_info *i = &target->local;
6044 if (i && i->local && i->can_change_signature)
6046 /* Refuse to produce wrong code when local function with SSE enabled
6047 is called from SSE disabled function.
6048 FIXME: We need a way to detect these cases cross-ltrans partition
6049 and avoid using SSE calling conventions on local functions called
6050 from function with SSE disabled. For now at least delay the
6051 warning until we know we are going to produce wrong code.
6052 See PR66047 */
6053 if (!TARGET_SSE && warn)
6054 return -1;
6055 return TARGET_SSE2_P (target_opts_for_fn (target->decl)
6056 ->x_ix86_isa_flags) ? 2 : 1;
6060 return 0;
6063 /* Return true if EAX is live at the start of the function. Used by
6064 ix86_expand_prologue to determine if we need special help before
6065 calling allocate_stack_worker. */
6067 static bool
6068 ix86_eax_live_at_start_p (void)
6070 /* Cheat. Don't bother working forward from ix86_function_regparm
6071 to the function type to whether an actual argument is located in
6072 eax. Instead just look at cfg info, which is still close enough
6073 to correct at this point. This gives false positives for broken
6074 functions that might use uninitialized data that happens to be
6075 allocated in eax, but who cares? */
6076 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
6079 static bool
6080 ix86_keep_aggregate_return_pointer (tree fntype)
6082 tree attr;
6084 if (!TARGET_64BIT)
6086 attr = lookup_attribute ("callee_pop_aggregate_return",
6087 TYPE_ATTRIBUTES (fntype));
6088 if (attr)
6089 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
6091 /* For 32-bit MS-ABI the default is to keep aggregate
6092 return pointer. */
6093 if (ix86_function_type_abi (fntype) == MS_ABI)
6094 return true;
6096 return KEEP_AGGREGATE_RETURN_POINTER != 0;
6099 /* Value is the number of bytes of arguments automatically
6100 popped when returning from a subroutine call.
6101 FUNDECL is the declaration node of the function (as a tree),
6102 FUNTYPE is the data type of the function (as a tree),
6103 or for a library call it is an identifier node for the subroutine name.
6104 SIZE is the number of bytes of arguments passed on the stack.
6106 On the 80386, the RTD insn may be used to pop them if the number
6107 of args is fixed, but if the number is variable then the caller
6108 must pop them all. RTD can't be used for library calls now
6109 because the library is compiled with the Unix compiler.
6110 Use of RTD is a selectable option, since it is incompatible with
6111 standard Unix calling sequences. If the option is not selected,
6112 the caller must always pop the args.
6114 The attribute stdcall is equivalent to RTD on a per module basis. */
6116 static int
6117 ix86_return_pops_args (tree fundecl, tree funtype, int size)
6119 unsigned int ccvt;
6121 /* None of the 64-bit ABIs pop arguments. */
6122 if (TARGET_64BIT)
6123 return 0;
6125 ccvt = ix86_get_callcvt (funtype);
6127 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
6128 | IX86_CALLCVT_THISCALL)) != 0
6129 && ! stdarg_p (funtype))
6130 return size;
6132 /* Lose any fake structure return argument if it is passed on the stack. */
6133 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
6134 && !ix86_keep_aggregate_return_pointer (funtype))
6136 int nregs = ix86_function_regparm (funtype, fundecl);
6137 if (nregs == 0)
6138 return GET_MODE_SIZE (Pmode);
6141 return 0;
6144 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
6146 static bool
6147 ix86_legitimate_combined_insn (rtx_insn *insn)
6149 /* Check operand constraints in case hard registers were propagated
6150 into insn pattern. This check prevents combine pass from
6151 generating insn patterns with invalid hard register operands.
6152 These invalid insns can eventually confuse reload to error out
6153 with a spill failure. See also PRs 46829 and 46843. */
6154 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
6156 int i;
6158 extract_insn (insn);
6159 preprocess_constraints (insn);
6161 int n_operands = recog_data.n_operands;
6162 int n_alternatives = recog_data.n_alternatives;
6163 for (i = 0; i < n_operands; i++)
6165 rtx op = recog_data.operand[i];
6166 machine_mode mode = GET_MODE (op);
6167 const operand_alternative *op_alt;
6168 int offset = 0;
6169 bool win;
6170 int j;
6172 /* For pre-AVX disallow unaligned loads/stores where the
6173 instructions don't support it. */
6174 if (!TARGET_AVX
6175 && VECTOR_MODE_P (GET_MODE (op))
6176 && misaligned_operand (op, GET_MODE (op)))
6178 int min_align = get_attr_ssememalign (insn);
6179 if (min_align == 0)
6180 return false;
6183 /* A unary operator may be accepted by the predicate, but it
6184 is irrelevant for matching constraints. */
6185 if (UNARY_P (op))
6186 op = XEXP (op, 0);
6188 if (GET_CODE (op) == SUBREG)
6190 if (REG_P (SUBREG_REG (op))
6191 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
6192 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
6193 GET_MODE (SUBREG_REG (op)),
6194 SUBREG_BYTE (op),
6195 GET_MODE (op));
6196 op = SUBREG_REG (op);
6199 if (!(REG_P (op) && HARD_REGISTER_P (op)))
6200 continue;
6202 op_alt = recog_op_alt;
6204 /* Operand has no constraints, anything is OK. */
6205 win = !n_alternatives;
6207 alternative_mask preferred = get_preferred_alternatives (insn);
6208 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
6210 if (!TEST_BIT (preferred, j))
6211 continue;
6212 if (op_alt[i].anything_ok
6213 || (op_alt[i].matches != -1
6214 && operands_match_p
6215 (recog_data.operand[i],
6216 recog_data.operand[op_alt[i].matches]))
6217 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
6219 win = true;
6220 break;
6224 if (!win)
6225 return false;
6229 return true;
6232 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
6234 static unsigned HOST_WIDE_INT
6235 ix86_asan_shadow_offset (void)
6237 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
6238 : HOST_WIDE_INT_C (0x7fff8000))
6239 : (HOST_WIDE_INT_1 << 29);
6242 /* Argument support functions. */
6244 /* Return true when register may be used to pass function parameters. */
6245 bool
6246 ix86_function_arg_regno_p (int regno)
6248 int i;
6249 enum calling_abi call_abi;
6250 const int *parm_regs;
6252 if (TARGET_MPX && BND_REGNO_P (regno))
6253 return true;
6255 if (!TARGET_64BIT)
6257 if (TARGET_MACHO)
6258 return (regno < REGPARM_MAX
6259 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
6260 else
6261 return (regno < REGPARM_MAX
6262 || (TARGET_MMX && MMX_REGNO_P (regno)
6263 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
6264 || (TARGET_SSE && SSE_REGNO_P (regno)
6265 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
6268 if (TARGET_SSE && SSE_REGNO_P (regno)
6269 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
6270 return true;
6272 /* TODO: The function should depend on current function ABI but
6273 builtins.c would need updating then. Therefore we use the
6274 default ABI. */
6275 call_abi = ix86_cfun_abi ();
6277 /* RAX is used as hidden argument to va_arg functions. */
6278 if (call_abi == SYSV_ABI && regno == AX_REG)
6279 return true;
6281 if (call_abi == MS_ABI)
6282 parm_regs = x86_64_ms_abi_int_parameter_registers;
6283 else
6284 parm_regs = x86_64_int_parameter_registers;
6286 for (i = 0; i < (call_abi == MS_ABI
6287 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
6288 if (regno == parm_regs[i])
6289 return true;
6290 return false;
6293 /* Return if we do not know how to pass TYPE solely in registers. */
6295 static bool
6296 ix86_must_pass_in_stack (machine_mode mode, const_tree type)
6298 if (must_pass_in_stack_var_size_or_pad (mode, type))
6299 return true;
6301 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
6302 The layout_type routine is crafty and tries to trick us into passing
6303 currently unsupported vector types on the stack by using TImode. */
6304 return (!TARGET_64BIT && mode == TImode
6305 && type && TREE_CODE (type) != VECTOR_TYPE);
6308 /* It returns the size, in bytes, of the area reserved for arguments passed
6309 in registers for the function represented by fndecl dependent to the used
6310 abi format. */
6312 ix86_reg_parm_stack_space (const_tree fndecl)
6314 enum calling_abi call_abi = SYSV_ABI;
6315 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
6316 call_abi = ix86_function_abi (fndecl);
6317 else
6318 call_abi = ix86_function_type_abi (fndecl);
6319 if (TARGET_64BIT && call_abi == MS_ABI)
6320 return 32;
6321 return 0;
6324 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
6325 call abi used. */
6326 enum calling_abi
6327 ix86_function_type_abi (const_tree fntype)
6329 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
6331 enum calling_abi abi = ix86_abi;
6332 if (abi == SYSV_ABI)
6334 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6336 if (TARGET_X32)
6338 static bool warned = false;
6339 if (!warned)
6341 error ("X32 does not support ms_abi attribute");
6342 warned = true;
6345 abi = MS_ABI;
6348 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6349 abi = SYSV_ABI;
6350 return abi;
6352 return ix86_abi;
6355 /* We add this as a workaround in order to use libc_has_function
6356 hook in i386.md. */
6357 bool
6358 ix86_libc_has_function (enum function_class fn_class)
6360 return targetm.libc_has_function (fn_class);
6363 static bool
6364 ix86_function_ms_hook_prologue (const_tree fn)
6366 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6368 if (decl_function_context (fn) != NULL_TREE)
6369 error_at (DECL_SOURCE_LOCATION (fn),
6370 "ms_hook_prologue is not compatible with nested function");
6371 else
6372 return true;
6374 return false;
6377 static enum calling_abi
6378 ix86_function_abi (const_tree fndecl)
6380 if (! fndecl)
6381 return ix86_abi;
6382 return ix86_function_type_abi (TREE_TYPE (fndecl));
6385 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6386 call abi used. */
6387 enum calling_abi
6388 ix86_cfun_abi (void)
6390 if (! cfun)
6391 return ix86_abi;
6392 return cfun->machine->call_abi;
6395 /* Write the extra assembler code needed to declare a function properly. */
6397 void
6398 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6399 tree decl)
6401 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6403 if (is_ms_hook)
6405 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6406 unsigned int filler_cc = 0xcccccccc;
6408 for (i = 0; i < filler_count; i += 4)
6409 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6412 #ifdef SUBTARGET_ASM_UNWIND_INIT
6413 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6414 #endif
6416 ASM_OUTPUT_LABEL (asm_out_file, fname);
6418 /* Output magic byte marker, if hot-patch attribute is set. */
6419 if (is_ms_hook)
6421 if (TARGET_64BIT)
6423 /* leaq [%rsp + 0], %rsp */
6424 asm_fprintf (asm_out_file, ASM_BYTE
6425 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6427 else
6429 /* movl.s %edi, %edi
6430 push %ebp
6431 movl.s %esp, %ebp */
6432 asm_fprintf (asm_out_file, ASM_BYTE
6433 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6438 /* regclass.c */
6439 extern void init_regs (void);
6441 /* Implementation of call abi switching target hook. Specific to FNDECL
6442 the specific call register sets are set. See also
6443 ix86_conditional_register_usage for more details. */
6444 void
6445 ix86_call_abi_override (const_tree fndecl)
6447 if (fndecl == NULL_TREE)
6448 cfun->machine->call_abi = ix86_abi;
6449 else
6450 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6453 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6454 expensive re-initialization of init_regs each time we switch function context
6455 since this is needed only during RTL expansion. */
6456 static void
6457 ix86_maybe_switch_abi (void)
6459 if (TARGET_64BIT &&
6460 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6461 reinit_regs ();
6464 /* Return 1 if pseudo register should be created and used to hold
6465 GOT address for PIC code. */
6466 bool
6467 ix86_use_pseudo_pic_reg (void)
6469 if ((TARGET_64BIT
6470 && (ix86_cmodel == CM_SMALL_PIC
6471 || TARGET_PECOFF))
6472 || !flag_pic)
6473 return false;
6474 return true;
6477 /* Initialize large model PIC register. */
6479 static void
6480 ix86_init_large_pic_reg (unsigned int tmp_regno)
6482 rtx_code_label *label;
6483 rtx tmp_reg;
6485 gcc_assert (Pmode == DImode);
6486 label = gen_label_rtx ();
6487 emit_label (label);
6488 LABEL_PRESERVE_P (label) = 1;
6489 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
6490 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
6491 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
6492 label));
6493 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6494 emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
6495 pic_offset_table_rtx, tmp_reg));
6498 /* Create and initialize PIC register if required. */
6499 static void
6500 ix86_init_pic_reg (void)
6502 edge entry_edge;
6503 rtx_insn *seq;
6505 if (!ix86_use_pseudo_pic_reg ())
6506 return;
6508 start_sequence ();
6510 if (TARGET_64BIT)
6512 if (ix86_cmodel == CM_LARGE_PIC)
6513 ix86_init_large_pic_reg (R11_REG);
6514 else
6515 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6517 else
6519 /* If there is future mcount call in the function it is more profitable
6520 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
6521 rtx reg = crtl->profile
6522 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
6523 : pic_offset_table_rtx;
6524 rtx_insn *insn = emit_insn (gen_set_got (reg));
6525 RTX_FRAME_RELATED_P (insn) = 1;
6526 if (crtl->profile)
6527 emit_move_insn (pic_offset_table_rtx, reg);
6528 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
6531 seq = get_insns ();
6532 end_sequence ();
6534 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6535 insert_insn_on_edge (seq, entry_edge);
6536 commit_one_edge_insertion (entry_edge);
6539 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6540 for a call to a function whose data type is FNTYPE.
6541 For a library call, FNTYPE is 0. */
6543 void
6544 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6545 tree fntype, /* tree ptr for function decl */
6546 rtx libname, /* SYMBOL_REF of library name or 0 */
6547 tree fndecl,
6548 int caller)
6550 struct cgraph_local_info *i = NULL;
6551 struct cgraph_node *target = NULL;
6553 memset (cum, 0, sizeof (*cum));
6555 if (fndecl)
6557 target = cgraph_node::get (fndecl);
6558 if (target)
6560 target = target->function_symbol ();
6561 i = cgraph_node::local_info (target->decl);
6562 cum->call_abi = ix86_function_abi (target->decl);
6564 else
6565 cum->call_abi = ix86_function_abi (fndecl);
6567 else
6568 cum->call_abi = ix86_function_type_abi (fntype);
6570 cum->caller = caller;
6572 /* Set up the number of registers to use for passing arguments. */
6573 cum->nregs = ix86_regparm;
6574 if (TARGET_64BIT)
6576 cum->nregs = (cum->call_abi == SYSV_ABI
6577 ? X86_64_REGPARM_MAX
6578 : X86_64_MS_REGPARM_MAX);
6580 if (TARGET_SSE)
6582 cum->sse_nregs = SSE_REGPARM_MAX;
6583 if (TARGET_64BIT)
6585 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6586 ? X86_64_SSE_REGPARM_MAX
6587 : X86_64_MS_SSE_REGPARM_MAX);
6590 if (TARGET_MMX)
6591 cum->mmx_nregs = MMX_REGPARM_MAX;
6592 cum->warn_avx512f = true;
6593 cum->warn_avx = true;
6594 cum->warn_sse = true;
6595 cum->warn_mmx = true;
6597 /* Because type might mismatch in between caller and callee, we need to
6598 use actual type of function for local calls.
6599 FIXME: cgraph_analyze can be told to actually record if function uses
6600 va_start so for local functions maybe_vaarg can be made aggressive
6601 helping K&R code.
6602 FIXME: once typesytem is fixed, we won't need this code anymore. */
6603 if (i && i->local && i->can_change_signature)
6604 fntype = TREE_TYPE (target->decl);
6605 cum->stdarg = stdarg_p (fntype);
6606 cum->maybe_vaarg = (fntype
6607 ? (!prototype_p (fntype) || stdarg_p (fntype))
6608 : !libname);
6610 cum->bnd_regno = FIRST_BND_REG;
6611 cum->bnds_in_bt = 0;
6612 cum->force_bnd_pass = 0;
6613 cum->decl = fndecl;
6615 if (!TARGET_64BIT)
6617 /* If there are variable arguments, then we won't pass anything
6618 in registers in 32-bit mode. */
6619 if (stdarg_p (fntype))
6621 cum->nregs = 0;
6622 cum->sse_nregs = 0;
6623 cum->mmx_nregs = 0;
6624 cum->warn_avx512f = false;
6625 cum->warn_avx = false;
6626 cum->warn_sse = false;
6627 cum->warn_mmx = false;
6628 return;
6631 /* Use ecx and edx registers if function has fastcall attribute,
6632 else look for regparm information. */
6633 if (fntype)
6635 unsigned int ccvt = ix86_get_callcvt (fntype);
6636 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6638 cum->nregs = 1;
6639 cum->fastcall = 1; /* Same first register as in fastcall. */
6641 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6643 cum->nregs = 2;
6644 cum->fastcall = 1;
6646 else
6647 cum->nregs = ix86_function_regparm (fntype, fndecl);
6650 /* Set up the number of SSE registers used for passing SFmode
6651 and DFmode arguments. Warn for mismatching ABI. */
6652 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6656 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6657 But in the case of vector types, it is some vector mode.
6659 When we have only some of our vector isa extensions enabled, then there
6660 are some modes for which vector_mode_supported_p is false. For these
6661 modes, the generic vector support in gcc will choose some non-vector mode
6662 in order to implement the type. By computing the natural mode, we'll
6663 select the proper ABI location for the operand and not depend on whatever
6664 the middle-end decides to do with these vector types.
6666 The midde-end can't deal with the vector types > 16 bytes. In this
6667 case, we return the original mode and warn ABI change if CUM isn't
6668 NULL.
6670 If INT_RETURN is true, warn ABI change if the vector mode isn't
6671 available for function return value. */
6673 static machine_mode
6674 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6675 bool in_return)
6677 machine_mode mode = TYPE_MODE (type);
6679 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6681 HOST_WIDE_INT size = int_size_in_bytes (type);
6682 if ((size == 8 || size == 16 || size == 32 || size == 64)
6683 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6684 && TYPE_VECTOR_SUBPARTS (type) > 1)
6686 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6688 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6689 mode = MIN_MODE_VECTOR_FLOAT;
6690 else
6691 mode = MIN_MODE_VECTOR_INT;
6693 /* Get the mode which has this inner mode and number of units. */
6694 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6695 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6696 && GET_MODE_INNER (mode) == innermode)
6698 if (size == 64 && !TARGET_AVX512F)
6700 static bool warnedavx512f;
6701 static bool warnedavx512f_ret;
6703 if (cum && cum->warn_avx512f && !warnedavx512f)
6705 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6706 "without AVX512F enabled changes the ABI"))
6707 warnedavx512f = true;
6709 else if (in_return && !warnedavx512f_ret)
6711 if (warning (OPT_Wpsabi, "AVX512F vector return "
6712 "without AVX512F enabled changes the ABI"))
6713 warnedavx512f_ret = true;
6716 return TYPE_MODE (type);
6718 else if (size == 32 && !TARGET_AVX)
6720 static bool warnedavx;
6721 static bool warnedavx_ret;
6723 if (cum && cum->warn_avx && !warnedavx)
6725 if (warning (OPT_Wpsabi, "AVX vector argument "
6726 "without AVX enabled changes the ABI"))
6727 warnedavx = true;
6729 else if (in_return && !warnedavx_ret)
6731 if (warning (OPT_Wpsabi, "AVX vector return "
6732 "without AVX enabled changes the ABI"))
6733 warnedavx_ret = true;
6736 return TYPE_MODE (type);
6738 else if (((size == 8 && TARGET_64BIT) || size == 16)
6739 && !TARGET_SSE)
6741 static bool warnedsse;
6742 static bool warnedsse_ret;
6744 if (cum && cum->warn_sse && !warnedsse)
6746 if (warning (OPT_Wpsabi, "SSE vector argument "
6747 "without SSE enabled changes the ABI"))
6748 warnedsse = true;
6750 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6752 if (warning (OPT_Wpsabi, "SSE vector return "
6753 "without SSE enabled changes the ABI"))
6754 warnedsse_ret = true;
6757 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6759 static bool warnedmmx;
6760 static bool warnedmmx_ret;
6762 if (cum && cum->warn_mmx && !warnedmmx)
6764 if (warning (OPT_Wpsabi, "MMX vector argument "
6765 "without MMX enabled changes the ABI"))
6766 warnedmmx = true;
6768 else if (in_return && !warnedmmx_ret)
6770 if (warning (OPT_Wpsabi, "MMX vector return "
6771 "without MMX enabled changes the ABI"))
6772 warnedmmx_ret = true;
6775 return mode;
6778 gcc_unreachable ();
6782 return mode;
6785 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6786 this may not agree with the mode that the type system has chosen for the
6787 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6788 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6790 static rtx
6791 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
6792 unsigned int regno)
6794 rtx tmp;
6796 if (orig_mode != BLKmode)
6797 tmp = gen_rtx_REG (orig_mode, regno);
6798 else
6800 tmp = gen_rtx_REG (mode, regno);
6801 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6802 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6805 return tmp;
6808 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6809 of this code is to classify each 8bytes of incoming argument by the register
6810 class and assign registers accordingly. */
6812 /* Return the union class of CLASS1 and CLASS2.
6813 See the x86-64 PS ABI for details. */
6815 static enum x86_64_reg_class
6816 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6818 /* Rule #1: If both classes are equal, this is the resulting class. */
6819 if (class1 == class2)
6820 return class1;
6822 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6823 the other class. */
6824 if (class1 == X86_64_NO_CLASS)
6825 return class2;
6826 if (class2 == X86_64_NO_CLASS)
6827 return class1;
6829 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6830 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6831 return X86_64_MEMORY_CLASS;
6833 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6834 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6835 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6836 return X86_64_INTEGERSI_CLASS;
6837 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6838 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6839 return X86_64_INTEGER_CLASS;
6841 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6842 MEMORY is used. */
6843 if (class1 == X86_64_X87_CLASS
6844 || class1 == X86_64_X87UP_CLASS
6845 || class1 == X86_64_COMPLEX_X87_CLASS
6846 || class2 == X86_64_X87_CLASS
6847 || class2 == X86_64_X87UP_CLASS
6848 || class2 == X86_64_COMPLEX_X87_CLASS)
6849 return X86_64_MEMORY_CLASS;
6851 /* Rule #6: Otherwise class SSE is used. */
6852 return X86_64_SSE_CLASS;
6855 /* Classify the argument of type TYPE and mode MODE.
6856 CLASSES will be filled by the register class used to pass each word
6857 of the operand. The number of words is returned. In case the parameter
6858 should be passed in memory, 0 is returned. As a special case for zero
6859 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6861 BIT_OFFSET is used internally for handling records and specifies offset
6862 of the offset in bits modulo 512 to avoid overflow cases.
6864 See the x86-64 PS ABI for details.
6867 static int
6868 classify_argument (machine_mode mode, const_tree type,
6869 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6871 HOST_WIDE_INT bytes =
6872 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6873 int words
6874 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6876 /* Variable sized entities are always passed/returned in memory. */
6877 if (bytes < 0)
6878 return 0;
6880 if (mode != VOIDmode
6881 && targetm.calls.must_pass_in_stack (mode, type))
6882 return 0;
6884 if (type && AGGREGATE_TYPE_P (type))
6886 int i;
6887 tree field;
6888 enum x86_64_reg_class subclasses[MAX_CLASSES];
6890 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6891 if (bytes > 64)
6892 return 0;
6894 for (i = 0; i < words; i++)
6895 classes[i] = X86_64_NO_CLASS;
6897 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6898 signalize memory class, so handle it as special case. */
6899 if (!words)
6901 classes[0] = X86_64_NO_CLASS;
6902 return 1;
6905 /* Classify each field of record and merge classes. */
6906 switch (TREE_CODE (type))
6908 case RECORD_TYPE:
6909 /* And now merge the fields of structure. */
6910 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6912 if (TREE_CODE (field) == FIELD_DECL)
6914 int num;
6916 if (TREE_TYPE (field) == error_mark_node)
6917 continue;
6919 /* Bitfields are always classified as integer. Handle them
6920 early, since later code would consider them to be
6921 misaligned integers. */
6922 if (DECL_BIT_FIELD (field))
6924 for (i = (int_bit_position (field)
6925 + (bit_offset % 64)) / 8 / 8;
6926 i < ((int_bit_position (field) + (bit_offset % 64))
6927 + tree_to_shwi (DECL_SIZE (field))
6928 + 63) / 8 / 8; i++)
6929 classes[i] =
6930 merge_classes (X86_64_INTEGER_CLASS,
6931 classes[i]);
6933 else
6935 int pos;
6937 type = TREE_TYPE (field);
6939 /* Flexible array member is ignored. */
6940 if (TYPE_MODE (type) == BLKmode
6941 && TREE_CODE (type) == ARRAY_TYPE
6942 && TYPE_SIZE (type) == NULL_TREE
6943 && TYPE_DOMAIN (type) != NULL_TREE
6944 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6945 == NULL_TREE))
6947 static bool warned;
6949 if (!warned && warn_psabi)
6951 warned = true;
6952 inform (input_location,
6953 "the ABI of passing struct with"
6954 " a flexible array member has"
6955 " changed in GCC 4.4");
6957 continue;
6959 num = classify_argument (TYPE_MODE (type), type,
6960 subclasses,
6961 (int_bit_position (field)
6962 + bit_offset) % 512);
6963 if (!num)
6964 return 0;
6965 pos = (int_bit_position (field)
6966 + (bit_offset % 64)) / 8 / 8;
6967 for (i = 0; i < num && (i + pos) < words; i++)
6968 classes[i + pos] =
6969 merge_classes (subclasses[i], classes[i + pos]);
6973 break;
6975 case ARRAY_TYPE:
6976 /* Arrays are handled as small records. */
6978 int num;
6979 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6980 TREE_TYPE (type), subclasses, bit_offset);
6981 if (!num)
6982 return 0;
6984 /* The partial classes are now full classes. */
6985 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6986 subclasses[0] = X86_64_SSE_CLASS;
6987 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6988 && !((bit_offset % 64) == 0 && bytes == 4))
6989 subclasses[0] = X86_64_INTEGER_CLASS;
6991 for (i = 0; i < words; i++)
6992 classes[i] = subclasses[i % num];
6994 break;
6996 case UNION_TYPE:
6997 case QUAL_UNION_TYPE:
6998 /* Unions are similar to RECORD_TYPE but offset is always 0.
7000 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7002 if (TREE_CODE (field) == FIELD_DECL)
7004 int num;
7006 if (TREE_TYPE (field) == error_mark_node)
7007 continue;
7009 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
7010 TREE_TYPE (field), subclasses,
7011 bit_offset);
7012 if (!num)
7013 return 0;
7014 for (i = 0; i < num && i < words; i++)
7015 classes[i] = merge_classes (subclasses[i], classes[i]);
7018 break;
7020 default:
7021 gcc_unreachable ();
7024 if (words > 2)
7026 /* When size > 16 bytes, if the first one isn't
7027 X86_64_SSE_CLASS or any other ones aren't
7028 X86_64_SSEUP_CLASS, everything should be passed in
7029 memory. */
7030 if (classes[0] != X86_64_SSE_CLASS)
7031 return 0;
7033 for (i = 1; i < words; i++)
7034 if (classes[i] != X86_64_SSEUP_CLASS)
7035 return 0;
7038 /* Final merger cleanup. */
7039 for (i = 0; i < words; i++)
7041 /* If one class is MEMORY, everything should be passed in
7042 memory. */
7043 if (classes[i] == X86_64_MEMORY_CLASS)
7044 return 0;
7046 /* The X86_64_SSEUP_CLASS should be always preceded by
7047 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
7048 if (classes[i] == X86_64_SSEUP_CLASS
7049 && classes[i - 1] != X86_64_SSE_CLASS
7050 && classes[i - 1] != X86_64_SSEUP_CLASS)
7052 /* The first one should never be X86_64_SSEUP_CLASS. */
7053 gcc_assert (i != 0);
7054 classes[i] = X86_64_SSE_CLASS;
7057 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
7058 everything should be passed in memory. */
7059 if (classes[i] == X86_64_X87UP_CLASS
7060 && (classes[i - 1] != X86_64_X87_CLASS))
7062 static bool warned;
7064 /* The first one should never be X86_64_X87UP_CLASS. */
7065 gcc_assert (i != 0);
7066 if (!warned && warn_psabi)
7068 warned = true;
7069 inform (input_location,
7070 "the ABI of passing union with long double"
7071 " has changed in GCC 4.4");
7073 return 0;
7076 return words;
7079 /* Compute alignment needed. We align all types to natural boundaries with
7080 exception of XFmode that is aligned to 64bits. */
7081 if (mode != VOIDmode && mode != BLKmode)
7083 int mode_alignment = GET_MODE_BITSIZE (mode);
7085 if (mode == XFmode)
7086 mode_alignment = 128;
7087 else if (mode == XCmode)
7088 mode_alignment = 256;
7089 if (COMPLEX_MODE_P (mode))
7090 mode_alignment /= 2;
7091 /* Misaligned fields are always returned in memory. */
7092 if (bit_offset % mode_alignment)
7093 return 0;
7096 /* for V1xx modes, just use the base mode */
7097 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
7098 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
7099 mode = GET_MODE_INNER (mode);
7101 /* Classification of atomic types. */
7102 switch (mode)
7104 case SDmode:
7105 case DDmode:
7106 classes[0] = X86_64_SSE_CLASS;
7107 return 1;
7108 case TDmode:
7109 classes[0] = X86_64_SSE_CLASS;
7110 classes[1] = X86_64_SSEUP_CLASS;
7111 return 2;
7112 case DImode:
7113 case SImode:
7114 case HImode:
7115 case QImode:
7116 case CSImode:
7117 case CHImode:
7118 case CQImode:
7120 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
7122 /* Analyze last 128 bits only. */
7123 size = (size - 1) & 0x7f;
7125 if (size < 32)
7127 classes[0] = X86_64_INTEGERSI_CLASS;
7128 return 1;
7130 else if (size < 64)
7132 classes[0] = X86_64_INTEGER_CLASS;
7133 return 1;
7135 else if (size < 64+32)
7137 classes[0] = X86_64_INTEGER_CLASS;
7138 classes[1] = X86_64_INTEGERSI_CLASS;
7139 return 2;
7141 else if (size < 64+64)
7143 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7144 return 2;
7146 else
7147 gcc_unreachable ();
7149 case CDImode:
7150 case TImode:
7151 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7152 return 2;
7153 case COImode:
7154 case OImode:
7155 /* OImode shouldn't be used directly. */
7156 gcc_unreachable ();
7157 case CTImode:
7158 return 0;
7159 case SFmode:
7160 if (!(bit_offset % 64))
7161 classes[0] = X86_64_SSESF_CLASS;
7162 else
7163 classes[0] = X86_64_SSE_CLASS;
7164 return 1;
7165 case DFmode:
7166 classes[0] = X86_64_SSEDF_CLASS;
7167 return 1;
7168 case XFmode:
7169 classes[0] = X86_64_X87_CLASS;
7170 classes[1] = X86_64_X87UP_CLASS;
7171 return 2;
7172 case TFmode:
7173 classes[0] = X86_64_SSE_CLASS;
7174 classes[1] = X86_64_SSEUP_CLASS;
7175 return 2;
7176 case SCmode:
7177 classes[0] = X86_64_SSE_CLASS;
7178 if (!(bit_offset % 64))
7179 return 1;
7180 else
7182 static bool warned;
7184 if (!warned && warn_psabi)
7186 warned = true;
7187 inform (input_location,
7188 "the ABI of passing structure with complex float"
7189 " member has changed in GCC 4.4");
7191 classes[1] = X86_64_SSESF_CLASS;
7192 return 2;
7194 case DCmode:
7195 classes[0] = X86_64_SSEDF_CLASS;
7196 classes[1] = X86_64_SSEDF_CLASS;
7197 return 2;
7198 case XCmode:
7199 classes[0] = X86_64_COMPLEX_X87_CLASS;
7200 return 1;
7201 case TCmode:
7202 /* This modes is larger than 16 bytes. */
7203 return 0;
7204 case V8SFmode:
7205 case V8SImode:
7206 case V32QImode:
7207 case V16HImode:
7208 case V4DFmode:
7209 case V4DImode:
7210 classes[0] = X86_64_SSE_CLASS;
7211 classes[1] = X86_64_SSEUP_CLASS;
7212 classes[2] = X86_64_SSEUP_CLASS;
7213 classes[3] = X86_64_SSEUP_CLASS;
7214 return 4;
7215 case V8DFmode:
7216 case V16SFmode:
7217 case V8DImode:
7218 case V16SImode:
7219 case V32HImode:
7220 case V64QImode:
7221 classes[0] = X86_64_SSE_CLASS;
7222 classes[1] = X86_64_SSEUP_CLASS;
7223 classes[2] = X86_64_SSEUP_CLASS;
7224 classes[3] = X86_64_SSEUP_CLASS;
7225 classes[4] = X86_64_SSEUP_CLASS;
7226 classes[5] = X86_64_SSEUP_CLASS;
7227 classes[6] = X86_64_SSEUP_CLASS;
7228 classes[7] = X86_64_SSEUP_CLASS;
7229 return 8;
7230 case V4SFmode:
7231 case V4SImode:
7232 case V16QImode:
7233 case V8HImode:
7234 case V2DFmode:
7235 case V2DImode:
7236 classes[0] = X86_64_SSE_CLASS;
7237 classes[1] = X86_64_SSEUP_CLASS;
7238 return 2;
7239 case V1TImode:
7240 case V1DImode:
7241 case V2SFmode:
7242 case V2SImode:
7243 case V4HImode:
7244 case V8QImode:
7245 classes[0] = X86_64_SSE_CLASS;
7246 return 1;
7247 case BLKmode:
7248 case VOIDmode:
7249 return 0;
7250 default:
7251 gcc_assert (VECTOR_MODE_P (mode));
7253 if (bytes > 16)
7254 return 0;
7256 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
7258 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
7259 classes[0] = X86_64_INTEGERSI_CLASS;
7260 else
7261 classes[0] = X86_64_INTEGER_CLASS;
7262 classes[1] = X86_64_INTEGER_CLASS;
7263 return 1 + (bytes > 8);
7267 /* Examine the argument and return set number of register required in each
7268 class. Return true iff parameter should be passed in memory. */
7270 static bool
7271 examine_argument (machine_mode mode, const_tree type, int in_return,
7272 int *int_nregs, int *sse_nregs)
7274 enum x86_64_reg_class regclass[MAX_CLASSES];
7275 int n = classify_argument (mode, type, regclass, 0);
7277 *int_nregs = 0;
7278 *sse_nregs = 0;
7280 if (!n)
7281 return true;
7282 for (n--; n >= 0; n--)
7283 switch (regclass[n])
7285 case X86_64_INTEGER_CLASS:
7286 case X86_64_INTEGERSI_CLASS:
7287 (*int_nregs)++;
7288 break;
7289 case X86_64_SSE_CLASS:
7290 case X86_64_SSESF_CLASS:
7291 case X86_64_SSEDF_CLASS:
7292 (*sse_nregs)++;
7293 break;
7294 case X86_64_NO_CLASS:
7295 case X86_64_SSEUP_CLASS:
7296 break;
7297 case X86_64_X87_CLASS:
7298 case X86_64_X87UP_CLASS:
7299 case X86_64_COMPLEX_X87_CLASS:
7300 if (!in_return)
7301 return true;
7302 break;
7303 case X86_64_MEMORY_CLASS:
7304 gcc_unreachable ();
7307 return false;
7310 /* Construct container for the argument used by GCC interface. See
7311 FUNCTION_ARG for the detailed description. */
7313 static rtx
7314 construct_container (machine_mode mode, machine_mode orig_mode,
7315 const_tree type, int in_return, int nintregs, int nsseregs,
7316 const int *intreg, int sse_regno)
7318 /* The following variables hold the static issued_error state. */
7319 static bool issued_sse_arg_error;
7320 static bool issued_sse_ret_error;
7321 static bool issued_x87_ret_error;
7323 machine_mode tmpmode;
7324 int bytes =
7325 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
7326 enum x86_64_reg_class regclass[MAX_CLASSES];
7327 int n;
7328 int i;
7329 int nexps = 0;
7330 int needed_sseregs, needed_intregs;
7331 rtx exp[MAX_CLASSES];
7332 rtx ret;
7334 n = classify_argument (mode, type, regclass, 0);
7335 if (!n)
7336 return NULL;
7337 if (examine_argument (mode, type, in_return, &needed_intregs,
7338 &needed_sseregs))
7339 return NULL;
7340 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
7341 return NULL;
7343 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
7344 some less clueful developer tries to use floating-point anyway. */
7345 if (needed_sseregs && !TARGET_SSE)
7347 if (in_return)
7349 if (!issued_sse_ret_error)
7351 error ("SSE register return with SSE disabled");
7352 issued_sse_ret_error = true;
7355 else if (!issued_sse_arg_error)
7357 error ("SSE register argument with SSE disabled");
7358 issued_sse_arg_error = true;
7360 return NULL;
7363 /* Likewise, error if the ABI requires us to return values in the
7364 x87 registers and the user specified -mno-80387. */
7365 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
7366 for (i = 0; i < n; i++)
7367 if (regclass[i] == X86_64_X87_CLASS
7368 || regclass[i] == X86_64_X87UP_CLASS
7369 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
7371 if (!issued_x87_ret_error)
7373 error ("x87 register return with x87 disabled");
7374 issued_x87_ret_error = true;
7376 return NULL;
7379 /* First construct simple cases. Avoid SCmode, since we want to use
7380 single register to pass this type. */
7381 if (n == 1 && mode != SCmode)
7382 switch (regclass[0])
7384 case X86_64_INTEGER_CLASS:
7385 case X86_64_INTEGERSI_CLASS:
7386 return gen_rtx_REG (mode, intreg[0]);
7387 case X86_64_SSE_CLASS:
7388 case X86_64_SSESF_CLASS:
7389 case X86_64_SSEDF_CLASS:
7390 if (mode != BLKmode)
7391 return gen_reg_or_parallel (mode, orig_mode,
7392 SSE_REGNO (sse_regno));
7393 break;
7394 case X86_64_X87_CLASS:
7395 case X86_64_COMPLEX_X87_CLASS:
7396 return gen_rtx_REG (mode, FIRST_STACK_REG);
7397 case X86_64_NO_CLASS:
7398 /* Zero sized array, struct or class. */
7399 return NULL;
7400 default:
7401 gcc_unreachable ();
7403 if (n == 2
7404 && regclass[0] == X86_64_SSE_CLASS
7405 && regclass[1] == X86_64_SSEUP_CLASS
7406 && mode != BLKmode)
7407 return gen_reg_or_parallel (mode, orig_mode,
7408 SSE_REGNO (sse_regno));
7409 if (n == 4
7410 && regclass[0] == X86_64_SSE_CLASS
7411 && regclass[1] == X86_64_SSEUP_CLASS
7412 && regclass[2] == X86_64_SSEUP_CLASS
7413 && regclass[3] == X86_64_SSEUP_CLASS
7414 && mode != BLKmode)
7415 return gen_reg_or_parallel (mode, orig_mode,
7416 SSE_REGNO (sse_regno));
7417 if (n == 8
7418 && regclass[0] == X86_64_SSE_CLASS
7419 && regclass[1] == X86_64_SSEUP_CLASS
7420 && regclass[2] == X86_64_SSEUP_CLASS
7421 && regclass[3] == X86_64_SSEUP_CLASS
7422 && regclass[4] == X86_64_SSEUP_CLASS
7423 && regclass[5] == X86_64_SSEUP_CLASS
7424 && regclass[6] == X86_64_SSEUP_CLASS
7425 && regclass[7] == X86_64_SSEUP_CLASS
7426 && mode != BLKmode)
7427 return gen_reg_or_parallel (mode, orig_mode,
7428 SSE_REGNO (sse_regno));
7429 if (n == 2
7430 && regclass[0] == X86_64_X87_CLASS
7431 && regclass[1] == X86_64_X87UP_CLASS)
7432 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7434 if (n == 2
7435 && regclass[0] == X86_64_INTEGER_CLASS
7436 && regclass[1] == X86_64_INTEGER_CLASS
7437 && (mode == CDImode || mode == TImode)
7438 && intreg[0] + 1 == intreg[1])
7439 return gen_rtx_REG (mode, intreg[0]);
7441 /* Otherwise figure out the entries of the PARALLEL. */
7442 for (i = 0; i < n; i++)
7444 int pos;
7446 switch (regclass[i])
7448 case X86_64_NO_CLASS:
7449 break;
7450 case X86_64_INTEGER_CLASS:
7451 case X86_64_INTEGERSI_CLASS:
7452 /* Merge TImodes on aligned occasions here too. */
7453 if (i * 8 + 8 > bytes)
7454 tmpmode
7455 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7456 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7457 tmpmode = SImode;
7458 else
7459 tmpmode = DImode;
7460 /* We've requested 24 bytes we
7461 don't have mode for. Use DImode. */
7462 if (tmpmode == BLKmode)
7463 tmpmode = DImode;
7464 exp [nexps++]
7465 = gen_rtx_EXPR_LIST (VOIDmode,
7466 gen_rtx_REG (tmpmode, *intreg),
7467 GEN_INT (i*8));
7468 intreg++;
7469 break;
7470 case X86_64_SSESF_CLASS:
7471 exp [nexps++]
7472 = gen_rtx_EXPR_LIST (VOIDmode,
7473 gen_rtx_REG (SFmode,
7474 SSE_REGNO (sse_regno)),
7475 GEN_INT (i*8));
7476 sse_regno++;
7477 break;
7478 case X86_64_SSEDF_CLASS:
7479 exp [nexps++]
7480 = gen_rtx_EXPR_LIST (VOIDmode,
7481 gen_rtx_REG (DFmode,
7482 SSE_REGNO (sse_regno)),
7483 GEN_INT (i*8));
7484 sse_regno++;
7485 break;
7486 case X86_64_SSE_CLASS:
7487 pos = i;
7488 switch (n)
7490 case 1:
7491 tmpmode = DImode;
7492 break;
7493 case 2:
7494 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7496 tmpmode = TImode;
7497 i++;
7499 else
7500 tmpmode = DImode;
7501 break;
7502 case 4:
7503 gcc_assert (i == 0
7504 && regclass[1] == X86_64_SSEUP_CLASS
7505 && regclass[2] == X86_64_SSEUP_CLASS
7506 && regclass[3] == X86_64_SSEUP_CLASS);
7507 tmpmode = OImode;
7508 i += 3;
7509 break;
7510 case 8:
7511 gcc_assert (i == 0
7512 && regclass[1] == X86_64_SSEUP_CLASS
7513 && regclass[2] == X86_64_SSEUP_CLASS
7514 && regclass[3] == X86_64_SSEUP_CLASS
7515 && regclass[4] == X86_64_SSEUP_CLASS
7516 && regclass[5] == X86_64_SSEUP_CLASS
7517 && regclass[6] == X86_64_SSEUP_CLASS
7518 && regclass[7] == X86_64_SSEUP_CLASS);
7519 tmpmode = XImode;
7520 i += 7;
7521 break;
7522 default:
7523 gcc_unreachable ();
7525 exp [nexps++]
7526 = gen_rtx_EXPR_LIST (VOIDmode,
7527 gen_rtx_REG (tmpmode,
7528 SSE_REGNO (sse_regno)),
7529 GEN_INT (pos*8));
7530 sse_regno++;
7531 break;
7532 default:
7533 gcc_unreachable ();
7537 /* Empty aligned struct, union or class. */
7538 if (nexps == 0)
7539 return NULL;
7541 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7542 for (i = 0; i < nexps; i++)
7543 XVECEXP (ret, 0, i) = exp [i];
7544 return ret;
7547 /* Update the data in CUM to advance over an argument of mode MODE
7548 and data type TYPE. (TYPE is null for libcalls where that information
7549 may not be available.)
7551 Return a number of integer regsiters advanced over. */
7553 static int
7554 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
7555 const_tree type, HOST_WIDE_INT bytes,
7556 HOST_WIDE_INT words)
7558 int res = 0;
7559 bool error_p = NULL;
7561 if (TARGET_IAMCU)
7563 /* Intel MCU psABI passes scalars and aggregates no larger than 8
7564 bytes in registers. */
7565 if (bytes <= 8)
7566 goto pass_in_reg;
7567 return res;
7570 switch (mode)
7572 default:
7573 break;
7575 case BLKmode:
7576 if (bytes < 0)
7577 break;
7578 /* FALLTHRU */
7580 case DImode:
7581 case SImode:
7582 case HImode:
7583 case QImode:
7584 pass_in_reg:
7585 cum->words += words;
7586 cum->nregs -= words;
7587 cum->regno += words;
7588 if (cum->nregs >= 0)
7589 res = words;
7590 if (cum->nregs <= 0)
7592 cum->nregs = 0;
7593 cum->regno = 0;
7595 break;
7597 case OImode:
7598 /* OImode shouldn't be used directly. */
7599 gcc_unreachable ();
7601 case DFmode:
7602 if (cum->float_in_sse == -1)
7603 error_p = 1;
7604 if (cum->float_in_sse < 2)
7605 break;
7606 case SFmode:
7607 if (cum->float_in_sse == -1)
7608 error_p = 1;
7609 if (cum->float_in_sse < 1)
7610 break;
7611 /* FALLTHRU */
7613 case V8SFmode:
7614 case V8SImode:
7615 case V64QImode:
7616 case V32HImode:
7617 case V16SImode:
7618 case V8DImode:
7619 case V16SFmode:
7620 case V8DFmode:
7621 case V32QImode:
7622 case V16HImode:
7623 case V4DFmode:
7624 case V4DImode:
7625 case TImode:
7626 case V16QImode:
7627 case V8HImode:
7628 case V4SImode:
7629 case V2DImode:
7630 case V4SFmode:
7631 case V2DFmode:
7632 if (!type || !AGGREGATE_TYPE_P (type))
7634 cum->sse_words += words;
7635 cum->sse_nregs -= 1;
7636 cum->sse_regno += 1;
7637 if (cum->sse_nregs <= 0)
7639 cum->sse_nregs = 0;
7640 cum->sse_regno = 0;
7643 break;
7645 case V8QImode:
7646 case V4HImode:
7647 case V2SImode:
7648 case V2SFmode:
7649 case V1TImode:
7650 case V1DImode:
7651 if (!type || !AGGREGATE_TYPE_P (type))
7653 cum->mmx_words += words;
7654 cum->mmx_nregs -= 1;
7655 cum->mmx_regno += 1;
7656 if (cum->mmx_nregs <= 0)
7658 cum->mmx_nregs = 0;
7659 cum->mmx_regno = 0;
7662 break;
7664 if (error_p)
7666 cum->float_in_sse = 0;
7667 error ("calling %qD with SSE calling convention without "
7668 "SSE/SSE2 enabled", cum->decl);
7669 sorry ("this is a GCC bug that can be worked around by adding "
7670 "attribute used to function called");
7673 return res;
7676 static int
7677 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
7678 const_tree type, HOST_WIDE_INT words, bool named)
7680 int int_nregs, sse_nregs;
7682 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7683 if (!named && (VALID_AVX512F_REG_MODE (mode)
7684 || VALID_AVX256_REG_MODE (mode)))
7685 return 0;
7687 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7688 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7690 cum->nregs -= int_nregs;
7691 cum->sse_nregs -= sse_nregs;
7692 cum->regno += int_nregs;
7693 cum->sse_regno += sse_nregs;
7694 return int_nregs;
7696 else
7698 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7699 cum->words = (cum->words + align - 1) & ~(align - 1);
7700 cum->words += words;
7701 return 0;
7705 static int
7706 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7707 HOST_WIDE_INT words)
7709 /* Otherwise, this should be passed indirect. */
7710 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7712 cum->words += words;
7713 if (cum->nregs > 0)
7715 cum->nregs -= 1;
7716 cum->regno += 1;
7717 return 1;
7719 return 0;
7722 /* Update the data in CUM to advance over an argument of mode MODE and
7723 data type TYPE. (TYPE is null for libcalls where that information
7724 may not be available.) */
7726 static void
7727 ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7728 const_tree type, bool named)
7730 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7731 HOST_WIDE_INT bytes, words;
7732 int nregs;
7734 if (mode == BLKmode)
7735 bytes = int_size_in_bytes (type);
7736 else
7737 bytes = GET_MODE_SIZE (mode);
7738 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7740 if (type)
7741 mode = type_natural_mode (type, NULL, false);
7743 if ((type && POINTER_BOUNDS_TYPE_P (type))
7744 || POINTER_BOUNDS_MODE_P (mode))
7746 /* If we pass bounds in BT then just update remained bounds count. */
7747 if (cum->bnds_in_bt)
7749 cum->bnds_in_bt--;
7750 return;
7753 /* Update remained number of bounds to force. */
7754 if (cum->force_bnd_pass)
7755 cum->force_bnd_pass--;
7757 cum->bnd_regno++;
7759 return;
7762 /* The first arg not going to Bounds Tables resets this counter. */
7763 cum->bnds_in_bt = 0;
7764 /* For unnamed args we always pass bounds to avoid bounds mess when
7765 passed and received types do not match. If bounds do not follow
7766 unnamed arg, still pretend required number of bounds were passed. */
7767 if (cum->force_bnd_pass)
7769 cum->bnd_regno += cum->force_bnd_pass;
7770 cum->force_bnd_pass = 0;
7773 if (TARGET_64BIT)
7775 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
7777 if (call_abi == MS_ABI)
7778 nregs = function_arg_advance_ms_64 (cum, bytes, words);
7779 else
7780 nregs = function_arg_advance_64 (cum, mode, type, words, named);
7782 else
7783 nregs = function_arg_advance_32 (cum, mode, type, bytes, words);
7785 /* For stdarg we expect bounds to be passed for each value passed
7786 in register. */
7787 if (cum->stdarg)
7788 cum->force_bnd_pass = nregs;
7789 /* For pointers passed in memory we expect bounds passed in Bounds
7790 Table. */
7791 if (!nregs)
7792 cum->bnds_in_bt = chkp_type_bounds_count (type);
7795 /* Define where to put the arguments to a function.
7796 Value is zero to push the argument on the stack,
7797 or a hard register in which to store the argument.
7799 MODE is the argument's machine mode.
7800 TYPE is the data type of the argument (as a tree).
7801 This is null for libcalls where that information may
7802 not be available.
7803 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7804 the preceding args and about the function being called.
7805 NAMED is nonzero if this argument is a named parameter
7806 (otherwise it is an extra parameter matching an ellipsis). */
7808 static rtx
7809 function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
7810 machine_mode orig_mode, const_tree type,
7811 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7813 bool error_p = false;
7814 /* Avoid the AL settings for the Unix64 ABI. */
7815 if (mode == VOIDmode)
7816 return constm1_rtx;
7818 if (TARGET_IAMCU)
7820 /* Intel MCU psABI passes scalars and aggregates no larger than 8
7821 bytes in registers. */
7822 if (bytes <= 8)
7823 goto pass_in_reg;
7824 return NULL_RTX;
7827 switch (mode)
7829 default:
7830 break;
7832 case BLKmode:
7833 if (bytes < 0)
7834 break;
7835 /* FALLTHRU */
7836 case DImode:
7837 case SImode:
7838 case HImode:
7839 case QImode:
7840 pass_in_reg:
7841 if (words <= cum->nregs)
7843 int regno = cum->regno;
7845 /* Fastcall allocates the first two DWORD (SImode) or
7846 smaller arguments to ECX and EDX if it isn't an
7847 aggregate type . */
7848 if (cum->fastcall)
7850 if (mode == BLKmode
7851 || mode == DImode
7852 || (type && AGGREGATE_TYPE_P (type)))
7853 break;
7855 /* ECX not EAX is the first allocated register. */
7856 if (regno == AX_REG)
7857 regno = CX_REG;
7859 return gen_rtx_REG (mode, regno);
7861 break;
7863 case DFmode:
7864 if (cum->float_in_sse == -1)
7865 error_p = 1;
7866 if (cum->float_in_sse < 2)
7867 break;
7868 case SFmode:
7869 if (cum->float_in_sse == -1)
7870 error_p = 1;
7871 if (cum->float_in_sse < 1)
7872 break;
7873 /* FALLTHRU */
7874 case TImode:
7875 /* In 32bit, we pass TImode in xmm registers. */
7876 case V16QImode:
7877 case V8HImode:
7878 case V4SImode:
7879 case V2DImode:
7880 case V4SFmode:
7881 case V2DFmode:
7882 if (!type || !AGGREGATE_TYPE_P (type))
7884 if (cum->sse_nregs)
7885 return gen_reg_or_parallel (mode, orig_mode,
7886 cum->sse_regno + FIRST_SSE_REG);
7888 break;
7890 case OImode:
7891 case XImode:
7892 /* OImode and XImode shouldn't be used directly. */
7893 gcc_unreachable ();
7895 case V64QImode:
7896 case V32HImode:
7897 case V16SImode:
7898 case V8DImode:
7899 case V16SFmode:
7900 case V8DFmode:
7901 case V8SFmode:
7902 case V8SImode:
7903 case V32QImode:
7904 case V16HImode:
7905 case V4DFmode:
7906 case V4DImode:
7907 if (!type || !AGGREGATE_TYPE_P (type))
7909 if (cum->sse_nregs)
7910 return gen_reg_or_parallel (mode, orig_mode,
7911 cum->sse_regno + FIRST_SSE_REG);
7913 break;
7915 case V8QImode:
7916 case V4HImode:
7917 case V2SImode:
7918 case V2SFmode:
7919 case V1TImode:
7920 case V1DImode:
7921 if (!type || !AGGREGATE_TYPE_P (type))
7923 if (cum->mmx_nregs)
7924 return gen_reg_or_parallel (mode, orig_mode,
7925 cum->mmx_regno + FIRST_MMX_REG);
7927 break;
7929 if (error_p)
7931 cum->float_in_sse = 0;
7932 error ("calling %qD with SSE calling convention without "
7933 "SSE/SSE2 enabled", cum->decl);
7934 sorry ("this is a GCC bug that can be worked around by adding "
7935 "attribute used to function called");
7938 return NULL_RTX;
7941 static rtx
7942 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7943 machine_mode orig_mode, const_tree type, bool named)
7945 /* Handle a hidden AL argument containing number of registers
7946 for varargs x86-64 functions. */
7947 if (mode == VOIDmode)
7948 return GEN_INT (cum->maybe_vaarg
7949 ? (cum->sse_nregs < 0
7950 ? X86_64_SSE_REGPARM_MAX
7951 : cum->sse_regno)
7952 : -1);
7954 switch (mode)
7956 default:
7957 break;
7959 case V8SFmode:
7960 case V8SImode:
7961 case V32QImode:
7962 case V16HImode:
7963 case V4DFmode:
7964 case V4DImode:
7965 case V16SFmode:
7966 case V16SImode:
7967 case V64QImode:
7968 case V32HImode:
7969 case V8DFmode:
7970 case V8DImode:
7971 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7972 if (!named)
7973 return NULL;
7974 break;
7977 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7978 cum->sse_nregs,
7979 &x86_64_int_parameter_registers [cum->regno],
7980 cum->sse_regno);
7983 static rtx
7984 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7985 machine_mode orig_mode, bool named,
7986 HOST_WIDE_INT bytes)
7988 unsigned int regno;
7990 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7991 We use value of -2 to specify that current function call is MSABI. */
7992 if (mode == VOIDmode)
7993 return GEN_INT (-2);
7995 /* If we've run out of registers, it goes on the stack. */
7996 if (cum->nregs == 0)
7997 return NULL_RTX;
7999 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
8001 /* Only floating point modes are passed in anything but integer regs. */
8002 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
8004 if (named)
8005 regno = cum->regno + FIRST_SSE_REG;
8006 else
8008 rtx t1, t2;
8010 /* Unnamed floating parameters are passed in both the
8011 SSE and integer registers. */
8012 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
8013 t2 = gen_rtx_REG (mode, regno);
8014 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
8015 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
8016 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
8019 /* Handle aggregated types passed in register. */
8020 if (orig_mode == BLKmode)
8022 if (bytes > 0 && bytes <= 8)
8023 mode = (bytes > 4 ? DImode : SImode);
8024 if (mode == BLKmode)
8025 mode = DImode;
8028 return gen_reg_or_parallel (mode, orig_mode, regno);
8031 /* Return where to put the arguments to a function.
8032 Return zero to push the argument on the stack, or a hard register in which to store the argument.
8034 MODE is the argument's machine mode. TYPE is the data type of the
8035 argument. It is null for libcalls where that information may not be
8036 available. CUM gives information about the preceding args and about
8037 the function being called. NAMED is nonzero if this argument is a
8038 named parameter (otherwise it is an extra parameter matching an
8039 ellipsis). */
8041 static rtx
8042 ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
8043 const_tree type, bool named)
8045 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8046 machine_mode mode = omode;
8047 HOST_WIDE_INT bytes, words;
8048 rtx arg;
8050 /* All pointer bounds argumntas are handled separately here. */
8051 if ((type && POINTER_BOUNDS_TYPE_P (type))
8052 || POINTER_BOUNDS_MODE_P (mode))
8054 /* Return NULL if bounds are forced to go in Bounds Table. */
8055 if (cum->bnds_in_bt)
8056 arg = NULL;
8057 /* Return the next available bound reg if any. */
8058 else if (cum->bnd_regno <= LAST_BND_REG)
8059 arg = gen_rtx_REG (BNDmode, cum->bnd_regno);
8060 /* Return the next special slot number otherwise. */
8061 else
8062 arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1);
8064 return arg;
8067 if (mode == BLKmode)
8068 bytes = int_size_in_bytes (type);
8069 else
8070 bytes = GET_MODE_SIZE (mode);
8071 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8073 /* To simplify the code below, represent vector types with a vector mode
8074 even if MMX/SSE are not active. */
8075 if (type && TREE_CODE (type) == VECTOR_TYPE)
8076 mode = type_natural_mode (type, cum, false);
8078 if (TARGET_64BIT)
8080 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
8082 if (call_abi == MS_ABI)
8083 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
8084 else
8085 arg = function_arg_64 (cum, mode, omode, type, named);
8087 else
8088 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
8090 return arg;
8093 /* A C expression that indicates when an argument must be passed by
8094 reference. If nonzero for an argument, a copy of that argument is
8095 made in memory and a pointer to the argument is passed instead of
8096 the argument itself. The pointer is passed in whatever way is
8097 appropriate for passing a pointer to that type. */
8099 static bool
8100 ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
8101 const_tree type, bool)
8103 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8105 /* Bounds are never passed by reference. */
8106 if ((type && POINTER_BOUNDS_TYPE_P (type))
8107 || POINTER_BOUNDS_MODE_P (mode))
8108 return false;
8110 if (TARGET_64BIT)
8112 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
8114 /* See Windows x64 Software Convention. */
8115 if (call_abi == MS_ABI)
8117 HOST_WIDE_INT msize = GET_MODE_SIZE (mode);
8119 if (type)
8121 /* Arrays are passed by reference. */
8122 if (TREE_CODE (type) == ARRAY_TYPE)
8123 return true;
8125 if (RECORD_OR_UNION_TYPE_P (type))
8127 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
8128 are passed by reference. */
8129 msize = int_size_in_bytes (type);
8133 /* __m128 is passed by reference. */
8134 return msize != 1 && msize != 2 && msize != 4 && msize != 8;
8136 else if (type && int_size_in_bytes (type) == -1)
8137 return true;
8140 return false;
8143 /* Return true when TYPE should be 128bit aligned for 32bit argument
8144 passing ABI. XXX: This function is obsolete and is only used for
8145 checking psABI compatibility with previous versions of GCC. */
8147 static bool
8148 ix86_compat_aligned_value_p (const_tree type)
8150 machine_mode mode = TYPE_MODE (type);
8151 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
8152 || mode == TDmode
8153 || mode == TFmode
8154 || mode == TCmode)
8155 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
8156 return true;
8157 if (TYPE_ALIGN (type) < 128)
8158 return false;
8160 if (AGGREGATE_TYPE_P (type))
8162 /* Walk the aggregates recursively. */
8163 switch (TREE_CODE (type))
8165 case RECORD_TYPE:
8166 case UNION_TYPE:
8167 case QUAL_UNION_TYPE:
8169 tree field;
8171 /* Walk all the structure fields. */
8172 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8174 if (TREE_CODE (field) == FIELD_DECL
8175 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
8176 return true;
8178 break;
8181 case ARRAY_TYPE:
8182 /* Just for use if some languages passes arrays by value. */
8183 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
8184 return true;
8185 break;
8187 default:
8188 gcc_unreachable ();
8191 return false;
8194 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
8195 XXX: This function is obsolete and is only used for checking psABI
8196 compatibility with previous versions of GCC. */
8198 static unsigned int
8199 ix86_compat_function_arg_boundary (machine_mode mode,
8200 const_tree type, unsigned int align)
8202 /* In 32bit, only _Decimal128 and __float128 are aligned to their
8203 natural boundaries. */
8204 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
8206 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
8207 make an exception for SSE modes since these require 128bit
8208 alignment.
8210 The handling here differs from field_alignment. ICC aligns MMX
8211 arguments to 4 byte boundaries, while structure fields are aligned
8212 to 8 byte boundaries. */
8213 if (!type)
8215 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
8216 align = PARM_BOUNDARY;
8218 else
8220 if (!ix86_compat_aligned_value_p (type))
8221 align = PARM_BOUNDARY;
8224 if (align > BIGGEST_ALIGNMENT)
8225 align = BIGGEST_ALIGNMENT;
8226 return align;
8229 /* Return true when TYPE should be 128bit aligned for 32bit argument
8230 passing ABI. */
8232 static bool
8233 ix86_contains_aligned_value_p (const_tree type)
8235 machine_mode mode = TYPE_MODE (type);
8237 if (mode == XFmode || mode == XCmode)
8238 return false;
8240 if (TYPE_ALIGN (type) < 128)
8241 return false;
8243 if (AGGREGATE_TYPE_P (type))
8245 /* Walk the aggregates recursively. */
8246 switch (TREE_CODE (type))
8248 case RECORD_TYPE:
8249 case UNION_TYPE:
8250 case QUAL_UNION_TYPE:
8252 tree field;
8254 /* Walk all the structure fields. */
8255 for (field = TYPE_FIELDS (type);
8256 field;
8257 field = DECL_CHAIN (field))
8259 if (TREE_CODE (field) == FIELD_DECL
8260 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
8261 return true;
8263 break;
8266 case ARRAY_TYPE:
8267 /* Just for use if some languages passes arrays by value. */
8268 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
8269 return true;
8270 break;
8272 default:
8273 gcc_unreachable ();
8276 else
8277 return TYPE_ALIGN (type) >= 128;
8279 return false;
8282 /* Gives the alignment boundary, in bits, of an argument with the
8283 specified mode and type. */
8285 static unsigned int
8286 ix86_function_arg_boundary (machine_mode mode, const_tree type)
8288 unsigned int align;
8289 if (type)
8291 /* Since the main variant type is used for call, we convert it to
8292 the main variant type. */
8293 type = TYPE_MAIN_VARIANT (type);
8294 align = TYPE_ALIGN (type);
8296 else
8297 align = GET_MODE_ALIGNMENT (mode);
8298 if (align < PARM_BOUNDARY)
8299 align = PARM_BOUNDARY;
8300 else
8302 static bool warned;
8303 unsigned int saved_align = align;
8305 if (!TARGET_64BIT)
8307 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
8308 if (!type)
8310 if (mode == XFmode || mode == XCmode)
8311 align = PARM_BOUNDARY;
8313 else if (!ix86_contains_aligned_value_p (type))
8314 align = PARM_BOUNDARY;
8316 if (align < 128)
8317 align = PARM_BOUNDARY;
8320 if (warn_psabi
8321 && !warned
8322 && align != ix86_compat_function_arg_boundary (mode, type,
8323 saved_align))
8325 warned = true;
8326 inform (input_location,
8327 "The ABI for passing parameters with %d-byte"
8328 " alignment has changed in GCC 4.6",
8329 align / BITS_PER_UNIT);
8333 return align;
8336 /* Return true if N is a possible register number of function value. */
8338 static bool
8339 ix86_function_value_regno_p (const unsigned int regno)
8341 switch (regno)
8343 case AX_REG:
8344 return true;
8345 case DX_REG:
8346 return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
8347 case DI_REG:
8348 case SI_REG:
8349 return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
8351 case BND0_REG:
8352 case BND1_REG:
8353 return chkp_function_instrumented_p (current_function_decl);
8355 /* Complex values are returned in %st(0)/%st(1) pair. */
8356 case ST0_REG:
8357 case ST1_REG:
8358 /* TODO: The function should depend on current function ABI but
8359 builtins.c would need updating then. Therefore we use the
8360 default ABI. */
8361 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
8362 return false;
8363 return TARGET_FLOAT_RETURNS_IN_80387;
8365 /* Complex values are returned in %xmm0/%xmm1 pair. */
8366 case XMM0_REG:
8367 case XMM1_REG:
8368 return TARGET_SSE;
8370 case MM0_REG:
8371 if (TARGET_MACHO || TARGET_64BIT)
8372 return false;
8373 return TARGET_MMX;
8376 return false;
8379 /* Define how to find the value returned by a function.
8380 VALTYPE is the data type of the value (as a tree).
8381 If the precise function being called is known, FUNC is its FUNCTION_DECL;
8382 otherwise, FUNC is 0. */
8384 static rtx
8385 function_value_32 (machine_mode orig_mode, machine_mode mode,
8386 const_tree fntype, const_tree fn)
8388 unsigned int regno;
8390 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
8391 we normally prevent this case when mmx is not available. However
8392 some ABIs may require the result to be returned like DImode. */
8393 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
8394 regno = FIRST_MMX_REG;
8396 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
8397 we prevent this case when sse is not available. However some ABIs
8398 may require the result to be returned like integer TImode. */
8399 else if (mode == TImode
8400 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
8401 regno = FIRST_SSE_REG;
8403 /* 32-byte vector modes in %ymm0. */
8404 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
8405 regno = FIRST_SSE_REG;
8407 /* 64-byte vector modes in %zmm0. */
8408 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
8409 regno = FIRST_SSE_REG;
8411 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
8412 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
8413 regno = FIRST_FLOAT_REG;
8414 else
8415 /* Most things go in %eax. */
8416 regno = AX_REG;
8418 /* Override FP return register with %xmm0 for local functions when
8419 SSE math is enabled or for functions with sseregparm attribute. */
8420 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
8422 int sse_level = ix86_function_sseregparm (fntype, fn, false);
8423 if (sse_level == -1)
8425 error ("calling %qD with SSE caling convention without "
8426 "SSE/SSE2 enabled", fn);
8427 sorry ("this is a GCC bug that can be worked around by adding "
8428 "attribute used to function called");
8430 else if ((sse_level >= 1 && mode == SFmode)
8431 || (sse_level == 2 && mode == DFmode))
8432 regno = FIRST_SSE_REG;
8435 /* OImode shouldn't be used directly. */
8436 gcc_assert (mode != OImode);
8438 return gen_rtx_REG (orig_mode, regno);
8441 static rtx
8442 function_value_64 (machine_mode orig_mode, machine_mode mode,
8443 const_tree valtype)
8445 rtx ret;
8447 /* Handle libcalls, which don't provide a type node. */
8448 if (valtype == NULL)
8450 unsigned int regno;
8452 switch (mode)
8454 case SFmode:
8455 case SCmode:
8456 case DFmode:
8457 case DCmode:
8458 case TFmode:
8459 case SDmode:
8460 case DDmode:
8461 case TDmode:
8462 regno = FIRST_SSE_REG;
8463 break;
8464 case XFmode:
8465 case XCmode:
8466 regno = FIRST_FLOAT_REG;
8467 break;
8468 case TCmode:
8469 return NULL;
8470 default:
8471 regno = AX_REG;
8474 return gen_rtx_REG (mode, regno);
8476 else if (POINTER_TYPE_P (valtype))
8478 /* Pointers are always returned in word_mode. */
8479 mode = word_mode;
8482 ret = construct_container (mode, orig_mode, valtype, 1,
8483 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
8484 x86_64_int_return_registers, 0);
8486 /* For zero sized structures, construct_container returns NULL, but we
8487 need to keep rest of compiler happy by returning meaningful value. */
8488 if (!ret)
8489 ret = gen_rtx_REG (orig_mode, AX_REG);
8491 return ret;
8494 static rtx
8495 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
8496 const_tree valtype)
8498 unsigned int regno = AX_REG;
8500 if (TARGET_SSE)
8502 switch (GET_MODE_SIZE (mode))
8504 case 16:
8505 if (valtype != NULL_TREE
8506 && !VECTOR_INTEGER_TYPE_P (valtype)
8507 && !VECTOR_INTEGER_TYPE_P (valtype)
8508 && !INTEGRAL_TYPE_P (valtype)
8509 && !VECTOR_FLOAT_TYPE_P (valtype))
8510 break;
8511 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8512 && !COMPLEX_MODE_P (mode))
8513 regno = FIRST_SSE_REG;
8514 break;
8515 case 8:
8516 case 4:
8517 if (mode == SFmode || mode == DFmode)
8518 regno = FIRST_SSE_REG;
8519 break;
8520 default:
8521 break;
8524 return gen_rtx_REG (orig_mode, regno);
8527 static rtx
8528 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
8529 machine_mode orig_mode, machine_mode mode)
8531 const_tree fn, fntype;
8533 fn = NULL_TREE;
8534 if (fntype_or_decl && DECL_P (fntype_or_decl))
8535 fn = fntype_or_decl;
8536 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
8538 if ((valtype && POINTER_BOUNDS_TYPE_P (valtype))
8539 || POINTER_BOUNDS_MODE_P (mode))
8540 return gen_rtx_REG (BNDmode, FIRST_BND_REG);
8541 else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
8542 return function_value_ms_64 (orig_mode, mode, valtype);
8543 else if (TARGET_64BIT)
8544 return function_value_64 (orig_mode, mode, valtype);
8545 else
8546 return function_value_32 (orig_mode, mode, fntype, fn);
8549 static rtx
8550 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
8552 machine_mode mode, orig_mode;
8554 orig_mode = TYPE_MODE (valtype);
8555 mode = type_natural_mode (valtype, NULL, true);
8556 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
8559 /* Return an RTX representing a place where a function returns
8560 or recieves pointer bounds or NULL if no bounds are returned.
8562 VALTYPE is a data type of a value returned by the function.
8564 FN_DECL_OR_TYPE is a tree node representing FUNCTION_DECL
8565 or FUNCTION_TYPE of the function.
8567 If OUTGOING is false, return a place in which the caller will
8568 see the return value. Otherwise, return a place where a
8569 function returns a value. */
8571 static rtx
8572 ix86_function_value_bounds (const_tree valtype,
8573 const_tree fntype_or_decl ATTRIBUTE_UNUSED,
8574 bool outgoing ATTRIBUTE_UNUSED)
8576 rtx res = NULL_RTX;
8578 if (BOUNDED_TYPE_P (valtype))
8579 res = gen_rtx_REG (BNDmode, FIRST_BND_REG);
8580 else if (chkp_type_has_pointer (valtype))
8582 bitmap slots;
8583 rtx bounds[2];
8584 bitmap_iterator bi;
8585 unsigned i, bnd_no = 0;
8587 bitmap_obstack_initialize (NULL);
8588 slots = BITMAP_ALLOC (NULL);
8589 chkp_find_bound_slots (valtype, slots);
8591 EXECUTE_IF_SET_IN_BITMAP (slots, 0, i, bi)
8593 rtx reg = gen_rtx_REG (BNDmode, FIRST_BND_REG + bnd_no);
8594 rtx offs = GEN_INT (i * POINTER_SIZE / BITS_PER_UNIT);
8595 gcc_assert (bnd_no < 2);
8596 bounds[bnd_no++] = gen_rtx_EXPR_LIST (VOIDmode, reg, offs);
8599 res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (bnd_no, bounds));
8601 BITMAP_FREE (slots);
8602 bitmap_obstack_release (NULL);
8604 else
8605 res = NULL_RTX;
8607 return res;
8610 /* Pointer function arguments and return values are promoted to
8611 word_mode. */
8613 static machine_mode
8614 ix86_promote_function_mode (const_tree type, machine_mode mode,
8615 int *punsignedp, const_tree fntype,
8616 int for_return)
8618 if (type != NULL_TREE && POINTER_TYPE_P (type))
8620 *punsignedp = POINTERS_EXTEND_UNSIGNED;
8621 return word_mode;
8623 return default_promote_function_mode (type, mode, punsignedp, fntype,
8624 for_return);
8627 /* Return true if a structure, union or array with MODE containing FIELD
8628 should be accessed using BLKmode. */
8630 static bool
8631 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
8633 /* Union with XFmode must be in BLKmode. */
8634 return (mode == XFmode
8635 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8636 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8640 ix86_libcall_value (machine_mode mode)
8642 return ix86_function_value_1 (NULL, NULL, mode, mode);
8645 /* Return true iff type is returned in memory. */
8647 static bool
8648 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8650 #ifdef SUBTARGET_RETURN_IN_MEMORY
8651 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8652 #else
8653 const machine_mode mode = type_natural_mode (type, NULL, true);
8654 HOST_WIDE_INT size;
8656 if (POINTER_BOUNDS_TYPE_P (type))
8657 return false;
8659 if (TARGET_64BIT)
8661 if (ix86_function_type_abi (fntype) == MS_ABI)
8663 size = int_size_in_bytes (type);
8665 /* __m128 is returned in xmm0. */
8666 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8667 || INTEGRAL_TYPE_P (type)
8668 || VECTOR_FLOAT_TYPE_P (type))
8669 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8670 && !COMPLEX_MODE_P (mode)
8671 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8672 return false;
8674 /* Otherwise, the size must be exactly in [1248]. */
8675 return size != 1 && size != 2 && size != 4 && size != 8;
8677 else
8679 int needed_intregs, needed_sseregs;
8681 return examine_argument (mode, type, 1,
8682 &needed_intregs, &needed_sseregs);
8685 else
8687 size = int_size_in_bytes (type);
8689 /* Intel MCU psABI returns scalars and aggregates no larger than 8
8690 bytes in registers. */
8691 if (TARGET_IAMCU)
8692 return size > 8;
8694 if (mode == BLKmode)
8695 return true;
8697 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8698 return false;
8700 if (VECTOR_MODE_P (mode) || mode == TImode)
8702 /* User-created vectors small enough to fit in EAX. */
8703 if (size < 8)
8704 return false;
8706 /* Unless ABI prescibes otherwise,
8707 MMX/3dNow values are returned in MM0 if available. */
8709 if (size == 8)
8710 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8712 /* SSE values are returned in XMM0 if available. */
8713 if (size == 16)
8714 return !TARGET_SSE;
8716 /* AVX values are returned in YMM0 if available. */
8717 if (size == 32)
8718 return !TARGET_AVX;
8720 /* AVX512F values are returned in ZMM0 if available. */
8721 if (size == 64)
8722 return !TARGET_AVX512F;
8725 if (mode == XFmode)
8726 return false;
8728 if (size > 12)
8729 return true;
8731 /* OImode shouldn't be used directly. */
8732 gcc_assert (mode != OImode);
8734 return false;
8736 #endif
8740 /* Create the va_list data type. */
8742 /* Returns the calling convention specific va_list date type.
8743 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8745 static tree
8746 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8748 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8750 /* For i386 we use plain pointer to argument area. */
8751 if (!TARGET_64BIT || abi == MS_ABI)
8752 return build_pointer_type (char_type_node);
8754 record = lang_hooks.types.make_type (RECORD_TYPE);
8755 type_decl = build_decl (BUILTINS_LOCATION,
8756 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8758 f_gpr = build_decl (BUILTINS_LOCATION,
8759 FIELD_DECL, get_identifier ("gp_offset"),
8760 unsigned_type_node);
8761 f_fpr = build_decl (BUILTINS_LOCATION,
8762 FIELD_DECL, get_identifier ("fp_offset"),
8763 unsigned_type_node);
8764 f_ovf = build_decl (BUILTINS_LOCATION,
8765 FIELD_DECL, get_identifier ("overflow_arg_area"),
8766 ptr_type_node);
8767 f_sav = build_decl (BUILTINS_LOCATION,
8768 FIELD_DECL, get_identifier ("reg_save_area"),
8769 ptr_type_node);
8771 va_list_gpr_counter_field = f_gpr;
8772 va_list_fpr_counter_field = f_fpr;
8774 DECL_FIELD_CONTEXT (f_gpr) = record;
8775 DECL_FIELD_CONTEXT (f_fpr) = record;
8776 DECL_FIELD_CONTEXT (f_ovf) = record;
8777 DECL_FIELD_CONTEXT (f_sav) = record;
8779 TYPE_STUB_DECL (record) = type_decl;
8780 TYPE_NAME (record) = type_decl;
8781 TYPE_FIELDS (record) = f_gpr;
8782 DECL_CHAIN (f_gpr) = f_fpr;
8783 DECL_CHAIN (f_fpr) = f_ovf;
8784 DECL_CHAIN (f_ovf) = f_sav;
8786 layout_type (record);
8788 /* The correct type is an array type of one element. */
8789 return build_array_type (record, build_index_type (size_zero_node));
8792 /* Setup the builtin va_list data type and for 64-bit the additional
8793 calling convention specific va_list data types. */
8795 static tree
8796 ix86_build_builtin_va_list (void)
8798 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8800 /* Initialize abi specific va_list builtin types. */
8801 if (TARGET_64BIT)
8803 tree t;
8804 if (ix86_abi == MS_ABI)
8806 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8807 if (TREE_CODE (t) != RECORD_TYPE)
8808 t = build_variant_type_copy (t);
8809 sysv_va_list_type_node = t;
8811 else
8813 t = ret;
8814 if (TREE_CODE (t) != RECORD_TYPE)
8815 t = build_variant_type_copy (t);
8816 sysv_va_list_type_node = t;
8818 if (ix86_abi != MS_ABI)
8820 t = ix86_build_builtin_va_list_abi (MS_ABI);
8821 if (TREE_CODE (t) != RECORD_TYPE)
8822 t = build_variant_type_copy (t);
8823 ms_va_list_type_node = t;
8825 else
8827 t = ret;
8828 if (TREE_CODE (t) != RECORD_TYPE)
8829 t = build_variant_type_copy (t);
8830 ms_va_list_type_node = t;
8834 return ret;
8837 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8839 static void
8840 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8842 rtx save_area, mem;
8843 alias_set_type set;
8844 int i, max;
8846 /* GPR size of varargs save area. */
8847 if (cfun->va_list_gpr_size)
8848 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8849 else
8850 ix86_varargs_gpr_size = 0;
8852 /* FPR size of varargs save area. We don't need it if we don't pass
8853 anything in SSE registers. */
8854 if (TARGET_SSE && cfun->va_list_fpr_size)
8855 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8856 else
8857 ix86_varargs_fpr_size = 0;
8859 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8860 return;
8862 save_area = frame_pointer_rtx;
8863 set = get_varargs_alias_set ();
8865 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8866 if (max > X86_64_REGPARM_MAX)
8867 max = X86_64_REGPARM_MAX;
8869 for (i = cum->regno; i < max; i++)
8871 mem = gen_rtx_MEM (word_mode,
8872 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8873 MEM_NOTRAP_P (mem) = 1;
8874 set_mem_alias_set (mem, set);
8875 emit_move_insn (mem,
8876 gen_rtx_REG (word_mode,
8877 x86_64_int_parameter_registers[i]));
8880 if (ix86_varargs_fpr_size)
8882 machine_mode smode;
8883 rtx_code_label *label;
8884 rtx test;
8886 /* Now emit code to save SSE registers. The AX parameter contains number
8887 of SSE parameter registers used to call this function, though all we
8888 actually check here is the zero/non-zero status. */
8890 label = gen_label_rtx ();
8891 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8892 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8893 label));
8895 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8896 we used movdqa (i.e. TImode) instead? Perhaps even better would
8897 be if we could determine the real mode of the data, via a hook
8898 into pass_stdarg. Ignore all that for now. */
8899 smode = V4SFmode;
8900 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8901 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8903 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8904 if (max > X86_64_SSE_REGPARM_MAX)
8905 max = X86_64_SSE_REGPARM_MAX;
8907 for (i = cum->sse_regno; i < max; ++i)
8909 mem = plus_constant (Pmode, save_area,
8910 i * 16 + ix86_varargs_gpr_size);
8911 mem = gen_rtx_MEM (smode, mem);
8912 MEM_NOTRAP_P (mem) = 1;
8913 set_mem_alias_set (mem, set);
8914 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8916 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8919 emit_label (label);
8923 static void
8924 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8926 alias_set_type set = get_varargs_alias_set ();
8927 int i;
8929 /* Reset to zero, as there might be a sysv vaarg used
8930 before. */
8931 ix86_varargs_gpr_size = 0;
8932 ix86_varargs_fpr_size = 0;
8934 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8936 rtx reg, mem;
8938 mem = gen_rtx_MEM (Pmode,
8939 plus_constant (Pmode, virtual_incoming_args_rtx,
8940 i * UNITS_PER_WORD));
8941 MEM_NOTRAP_P (mem) = 1;
8942 set_mem_alias_set (mem, set);
8944 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8945 emit_move_insn (mem, reg);
8949 static void
8950 ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
8951 tree type, int *, int no_rtl)
8953 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8954 CUMULATIVE_ARGS next_cum;
8955 tree fntype;
8957 /* This argument doesn't appear to be used anymore. Which is good,
8958 because the old code here didn't suppress rtl generation. */
8959 gcc_assert (!no_rtl);
8961 if (!TARGET_64BIT)
8962 return;
8964 fntype = TREE_TYPE (current_function_decl);
8966 /* For varargs, we do not want to skip the dummy va_dcl argument.
8967 For stdargs, we do want to skip the last named argument. */
8968 next_cum = *cum;
8969 if (stdarg_p (fntype))
8970 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8971 true);
8973 if (cum->call_abi == MS_ABI)
8974 setup_incoming_varargs_ms_64 (&next_cum);
8975 else
8976 setup_incoming_varargs_64 (&next_cum);
8979 static void
8980 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
8981 enum machine_mode mode,
8982 tree type,
8983 int *pretend_size ATTRIBUTE_UNUSED,
8984 int no_rtl)
8986 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8987 CUMULATIVE_ARGS next_cum;
8988 tree fntype;
8989 rtx save_area;
8990 int bnd_reg, i, max;
8992 gcc_assert (!no_rtl);
8994 /* Do nothing if we use plain pointer to argument area. */
8995 if (!TARGET_64BIT || cum->call_abi == MS_ABI)
8996 return;
8998 fntype = TREE_TYPE (current_function_decl);
9000 /* For varargs, we do not want to skip the dummy va_dcl argument.
9001 For stdargs, we do want to skip the last named argument. */
9002 next_cum = *cum;
9003 if (stdarg_p (fntype))
9004 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
9005 true);
9006 save_area = frame_pointer_rtx;
9008 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
9009 if (max > X86_64_REGPARM_MAX)
9010 max = X86_64_REGPARM_MAX;
9012 bnd_reg = cum->bnd_regno + cum->force_bnd_pass;
9013 if (chkp_function_instrumented_p (current_function_decl))
9014 for (i = cum->regno; i < max; i++)
9016 rtx addr = plus_constant (Pmode, save_area, i * UNITS_PER_WORD);
9017 rtx ptr = gen_rtx_REG (Pmode,
9018 x86_64_int_parameter_registers[i]);
9019 rtx bounds;
9021 if (bnd_reg <= LAST_BND_REG)
9022 bounds = gen_rtx_REG (BNDmode, bnd_reg);
9023 else
9025 rtx ldx_addr =
9026 plus_constant (Pmode, arg_pointer_rtx,
9027 (LAST_BND_REG - bnd_reg) * GET_MODE_SIZE (Pmode));
9028 bounds = gen_reg_rtx (BNDmode);
9029 emit_insn (BNDmode == BND64mode
9030 ? gen_bnd64_ldx (bounds, ldx_addr, ptr)
9031 : gen_bnd32_ldx (bounds, ldx_addr, ptr));
9034 emit_insn (BNDmode == BND64mode
9035 ? gen_bnd64_stx (addr, ptr, bounds)
9036 : gen_bnd32_stx (addr, ptr, bounds));
9038 bnd_reg++;
9043 /* Checks if TYPE is of kind va_list char *. */
9045 static bool
9046 is_va_list_char_pointer (tree type)
9048 tree canonic;
9050 /* For 32-bit it is always true. */
9051 if (!TARGET_64BIT)
9052 return true;
9053 canonic = ix86_canonical_va_list_type (type);
9054 return (canonic == ms_va_list_type_node
9055 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
9058 /* Implement va_start. */
9060 static void
9061 ix86_va_start (tree valist, rtx nextarg)
9063 HOST_WIDE_INT words, n_gpr, n_fpr;
9064 tree f_gpr, f_fpr, f_ovf, f_sav;
9065 tree gpr, fpr, ovf, sav, t;
9066 tree type;
9067 rtx ovf_rtx;
9069 if (flag_split_stack
9070 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
9072 unsigned int scratch_regno;
9074 /* When we are splitting the stack, we can't refer to the stack
9075 arguments using internal_arg_pointer, because they may be on
9076 the old stack. The split stack prologue will arrange to
9077 leave a pointer to the old stack arguments in a scratch
9078 register, which we here copy to a pseudo-register. The split
9079 stack prologue can't set the pseudo-register directly because
9080 it (the prologue) runs before any registers have been saved. */
9082 scratch_regno = split_stack_prologue_scratch_regno ();
9083 if (scratch_regno != INVALID_REGNUM)
9085 rtx reg;
9086 rtx_insn *seq;
9088 reg = gen_reg_rtx (Pmode);
9089 cfun->machine->split_stack_varargs_pointer = reg;
9091 start_sequence ();
9092 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
9093 seq = get_insns ();
9094 end_sequence ();
9096 push_topmost_sequence ();
9097 emit_insn_after (seq, entry_of_function ());
9098 pop_topmost_sequence ();
9102 /* Only 64bit target needs something special. */
9103 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
9105 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
9106 std_expand_builtin_va_start (valist, nextarg);
9107 else
9109 rtx va_r, next;
9111 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
9112 next = expand_binop (ptr_mode, add_optab,
9113 cfun->machine->split_stack_varargs_pointer,
9114 crtl->args.arg_offset_rtx,
9115 NULL_RTX, 0, OPTAB_LIB_WIDEN);
9116 convert_move (va_r, next, 0);
9118 /* Store zero bounds for va_list. */
9119 if (chkp_function_instrumented_p (current_function_decl))
9120 chkp_expand_bounds_reset_for_mem (valist,
9121 make_tree (TREE_TYPE (valist),
9122 next));
9125 return;
9128 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
9129 f_fpr = DECL_CHAIN (f_gpr);
9130 f_ovf = DECL_CHAIN (f_fpr);
9131 f_sav = DECL_CHAIN (f_ovf);
9133 valist = build_simple_mem_ref (valist);
9134 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
9135 /* The following should be folded into the MEM_REF offset. */
9136 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
9137 f_gpr, NULL_TREE);
9138 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
9139 f_fpr, NULL_TREE);
9140 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
9141 f_ovf, NULL_TREE);
9142 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
9143 f_sav, NULL_TREE);
9145 /* Count number of gp and fp argument registers used. */
9146 words = crtl->args.info.words;
9147 n_gpr = crtl->args.info.regno;
9148 n_fpr = crtl->args.info.sse_regno;
9150 if (cfun->va_list_gpr_size)
9152 type = TREE_TYPE (gpr);
9153 t = build2 (MODIFY_EXPR, type,
9154 gpr, build_int_cst (type, n_gpr * 8));
9155 TREE_SIDE_EFFECTS (t) = 1;
9156 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9159 if (TARGET_SSE && cfun->va_list_fpr_size)
9161 type = TREE_TYPE (fpr);
9162 t = build2 (MODIFY_EXPR, type, fpr,
9163 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
9164 TREE_SIDE_EFFECTS (t) = 1;
9165 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9168 /* Find the overflow area. */
9169 type = TREE_TYPE (ovf);
9170 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
9171 ovf_rtx = crtl->args.internal_arg_pointer;
9172 else
9173 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
9174 t = make_tree (type, ovf_rtx);
9175 if (words != 0)
9176 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
9178 /* Store zero bounds for overflow area pointer. */
9179 if (chkp_function_instrumented_p (current_function_decl))
9180 chkp_expand_bounds_reset_for_mem (ovf, t);
9182 t = build2 (MODIFY_EXPR, type, ovf, t);
9183 TREE_SIDE_EFFECTS (t) = 1;
9184 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9186 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
9188 /* Find the register save area.
9189 Prologue of the function save it right above stack frame. */
9190 type = TREE_TYPE (sav);
9191 t = make_tree (type, frame_pointer_rtx);
9192 if (!ix86_varargs_gpr_size)
9193 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
9195 /* Store zero bounds for save area pointer. */
9196 if (chkp_function_instrumented_p (current_function_decl))
9197 chkp_expand_bounds_reset_for_mem (sav, t);
9199 t = build2 (MODIFY_EXPR, type, sav, t);
9200 TREE_SIDE_EFFECTS (t) = 1;
9201 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9205 /* Implement va_arg. */
9207 static tree
9208 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
9209 gimple_seq *post_p)
9211 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
9212 tree f_gpr, f_fpr, f_ovf, f_sav;
9213 tree gpr, fpr, ovf, sav, t;
9214 int size, rsize;
9215 tree lab_false, lab_over = NULL_TREE;
9216 tree addr, t2;
9217 rtx container;
9218 int indirect_p = 0;
9219 tree ptrtype;
9220 machine_mode nat_mode;
9221 unsigned int arg_boundary;
9223 /* Only 64bit target needs something special. */
9224 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
9225 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
9227 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
9228 f_fpr = DECL_CHAIN (f_gpr);
9229 f_ovf = DECL_CHAIN (f_fpr);
9230 f_sav = DECL_CHAIN (f_ovf);
9232 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
9233 valist, f_gpr, NULL_TREE);
9235 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
9236 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
9237 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
9239 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
9240 if (indirect_p)
9241 type = build_pointer_type (type);
9242 size = int_size_in_bytes (type);
9243 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
9245 nat_mode = type_natural_mode (type, NULL, false);
9246 switch (nat_mode)
9248 case V8SFmode:
9249 case V8SImode:
9250 case V32QImode:
9251 case V16HImode:
9252 case V4DFmode:
9253 case V4DImode:
9254 case V16SFmode:
9255 case V16SImode:
9256 case V64QImode:
9257 case V32HImode:
9258 case V8DFmode:
9259 case V8DImode:
9260 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
9261 if (!TARGET_64BIT_MS_ABI)
9263 container = NULL;
9264 break;
9267 default:
9268 container = construct_container (nat_mode, TYPE_MODE (type),
9269 type, 0, X86_64_REGPARM_MAX,
9270 X86_64_SSE_REGPARM_MAX, intreg,
9272 break;
9275 /* Pull the value out of the saved registers. */
9277 addr = create_tmp_var (ptr_type_node, "addr");
9279 if (container)
9281 int needed_intregs, needed_sseregs;
9282 bool need_temp;
9283 tree int_addr, sse_addr;
9285 lab_false = create_artificial_label (UNKNOWN_LOCATION);
9286 lab_over = create_artificial_label (UNKNOWN_LOCATION);
9288 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
9290 need_temp = (!REG_P (container)
9291 && ((needed_intregs && TYPE_ALIGN (type) > 64)
9292 || TYPE_ALIGN (type) > 128));
9294 /* In case we are passing structure, verify that it is consecutive block
9295 on the register save area. If not we need to do moves. */
9296 if (!need_temp && !REG_P (container))
9298 /* Verify that all registers are strictly consecutive */
9299 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
9301 int i;
9303 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9305 rtx slot = XVECEXP (container, 0, i);
9306 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
9307 || INTVAL (XEXP (slot, 1)) != i * 16)
9308 need_temp = true;
9311 else
9313 int i;
9315 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9317 rtx slot = XVECEXP (container, 0, i);
9318 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
9319 || INTVAL (XEXP (slot, 1)) != i * 8)
9320 need_temp = true;
9324 if (!need_temp)
9326 int_addr = addr;
9327 sse_addr = addr;
9329 else
9331 int_addr = create_tmp_var (ptr_type_node, "int_addr");
9332 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
9335 /* First ensure that we fit completely in registers. */
9336 if (needed_intregs)
9338 t = build_int_cst (TREE_TYPE (gpr),
9339 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
9340 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
9341 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9342 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9343 gimplify_and_add (t, pre_p);
9345 if (needed_sseregs)
9347 t = build_int_cst (TREE_TYPE (fpr),
9348 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
9349 + X86_64_REGPARM_MAX * 8);
9350 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
9351 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9352 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9353 gimplify_and_add (t, pre_p);
9356 /* Compute index to start of area used for integer regs. */
9357 if (needed_intregs)
9359 /* int_addr = gpr + sav; */
9360 t = fold_build_pointer_plus (sav, gpr);
9361 gimplify_assign (int_addr, t, pre_p);
9363 if (needed_sseregs)
9365 /* sse_addr = fpr + sav; */
9366 t = fold_build_pointer_plus (sav, fpr);
9367 gimplify_assign (sse_addr, t, pre_p);
9369 if (need_temp)
9371 int i, prev_size = 0;
9372 tree temp = create_tmp_var (type, "va_arg_tmp");
9374 /* addr = &temp; */
9375 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
9376 gimplify_assign (addr, t, pre_p);
9378 for (i = 0; i < XVECLEN (container, 0); i++)
9380 rtx slot = XVECEXP (container, 0, i);
9381 rtx reg = XEXP (slot, 0);
9382 machine_mode mode = GET_MODE (reg);
9383 tree piece_type;
9384 tree addr_type;
9385 tree daddr_type;
9386 tree src_addr, src;
9387 int src_offset;
9388 tree dest_addr, dest;
9389 int cur_size = GET_MODE_SIZE (mode);
9391 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
9392 prev_size = INTVAL (XEXP (slot, 1));
9393 if (prev_size + cur_size > size)
9395 cur_size = size - prev_size;
9396 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
9397 if (mode == BLKmode)
9398 mode = QImode;
9400 piece_type = lang_hooks.types.type_for_mode (mode, 1);
9401 if (mode == GET_MODE (reg))
9402 addr_type = build_pointer_type (piece_type);
9403 else
9404 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9405 true);
9406 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9407 true);
9409 if (SSE_REGNO_P (REGNO (reg)))
9411 src_addr = sse_addr;
9412 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
9414 else
9416 src_addr = int_addr;
9417 src_offset = REGNO (reg) * 8;
9419 src_addr = fold_convert (addr_type, src_addr);
9420 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
9422 dest_addr = fold_convert (daddr_type, addr);
9423 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
9424 if (cur_size == GET_MODE_SIZE (mode))
9426 src = build_va_arg_indirect_ref (src_addr);
9427 dest = build_va_arg_indirect_ref (dest_addr);
9429 gimplify_assign (dest, src, pre_p);
9431 else
9433 tree copy
9434 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
9435 3, dest_addr, src_addr,
9436 size_int (cur_size));
9437 gimplify_and_add (copy, pre_p);
9439 prev_size += cur_size;
9443 if (needed_intregs)
9445 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
9446 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
9447 gimplify_assign (gpr, t, pre_p);
9450 if (needed_sseregs)
9452 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
9453 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
9454 gimplify_assign (unshare_expr (fpr), t, pre_p);
9457 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
9459 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
9462 /* ... otherwise out of the overflow area. */
9464 /* When we align parameter on stack for caller, if the parameter
9465 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
9466 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
9467 here with caller. */
9468 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
9469 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
9470 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
9472 /* Care for on-stack alignment if needed. */
9473 if (arg_boundary <= 64 || size == 0)
9474 t = ovf;
9475 else
9477 HOST_WIDE_INT align = arg_boundary / 8;
9478 t = fold_build_pointer_plus_hwi (ovf, align - 1);
9479 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
9480 build_int_cst (TREE_TYPE (t), -align));
9483 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
9484 gimplify_assign (addr, t, pre_p);
9486 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
9487 gimplify_assign (unshare_expr (ovf), t, pre_p);
9489 if (container)
9490 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
9492 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
9493 addr = fold_convert (ptrtype, addr);
9495 if (indirect_p)
9496 addr = build_va_arg_indirect_ref (addr);
9497 return build_va_arg_indirect_ref (addr);
9500 /* Return true if OPNUM's MEM should be matched
9501 in movabs* patterns. */
9503 bool
9504 ix86_check_movabs (rtx insn, int opnum)
9506 rtx set, mem;
9508 set = PATTERN (insn);
9509 if (GET_CODE (set) == PARALLEL)
9510 set = XVECEXP (set, 0, 0);
9511 gcc_assert (GET_CODE (set) == SET);
9512 mem = XEXP (set, opnum);
9513 while (GET_CODE (mem) == SUBREG)
9514 mem = SUBREG_REG (mem);
9515 gcc_assert (MEM_P (mem));
9516 return volatile_ok || !MEM_VOLATILE_P (mem);
9519 /* Initialize the table of extra 80387 mathematical constants. */
9521 static void
9522 init_ext_80387_constants (void)
9524 static const char * cst[5] =
9526 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
9527 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
9528 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
9529 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
9530 "3.1415926535897932385128089594061862044", /* 4: fldpi */
9532 int i;
9534 for (i = 0; i < 5; i++)
9536 real_from_string (&ext_80387_constants_table[i], cst[i]);
9537 /* Ensure each constant is rounded to XFmode precision. */
9538 real_convert (&ext_80387_constants_table[i],
9539 XFmode, &ext_80387_constants_table[i]);
9542 ext_80387_constants_init = 1;
9545 /* Return non-zero if the constant is something that
9546 can be loaded with a special instruction. */
9549 standard_80387_constant_p (rtx x)
9551 machine_mode mode = GET_MODE (x);
9553 REAL_VALUE_TYPE r;
9555 if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
9556 return -1;
9558 if (x == CONST0_RTX (mode))
9559 return 1;
9560 if (x == CONST1_RTX (mode))
9561 return 2;
9563 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9565 /* For XFmode constants, try to find a special 80387 instruction when
9566 optimizing for size or on those CPUs that benefit from them. */
9567 if (mode == XFmode
9568 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
9570 int i;
9572 if (! ext_80387_constants_init)
9573 init_ext_80387_constants ();
9575 for (i = 0; i < 5; i++)
9576 if (real_identical (&r, &ext_80387_constants_table[i]))
9577 return i + 3;
9580 /* Load of the constant -0.0 or -1.0 will be split as
9581 fldz;fchs or fld1;fchs sequence. */
9582 if (real_isnegzero (&r))
9583 return 8;
9584 if (real_identical (&r, &dconstm1))
9585 return 9;
9587 return 0;
9590 /* Return the opcode of the special instruction to be used to load
9591 the constant X. */
9593 const char *
9594 standard_80387_constant_opcode (rtx x)
9596 switch (standard_80387_constant_p (x))
9598 case 1:
9599 return "fldz";
9600 case 2:
9601 return "fld1";
9602 case 3:
9603 return "fldlg2";
9604 case 4:
9605 return "fldln2";
9606 case 5:
9607 return "fldl2e";
9608 case 6:
9609 return "fldl2t";
9610 case 7:
9611 return "fldpi";
9612 case 8:
9613 case 9:
9614 return "#";
9615 default:
9616 gcc_unreachable ();
9620 /* Return the CONST_DOUBLE representing the 80387 constant that is
9621 loaded by the specified special instruction. The argument IDX
9622 matches the return value from standard_80387_constant_p. */
9625 standard_80387_constant_rtx (int idx)
9627 int i;
9629 if (! ext_80387_constants_init)
9630 init_ext_80387_constants ();
9632 switch (idx)
9634 case 3:
9635 case 4:
9636 case 5:
9637 case 6:
9638 case 7:
9639 i = idx - 3;
9640 break;
9642 default:
9643 gcc_unreachable ();
9646 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
9647 XFmode);
9650 /* Return 1 if X is all 0s and 2 if x is all 1s
9651 in supported SSE/AVX vector mode. */
9654 standard_sse_constant_p (rtx x)
9656 machine_mode mode;
9658 if (!TARGET_SSE)
9659 return 0;
9661 mode = GET_MODE (x);
9663 if (x == const0_rtx || x == CONST0_RTX (mode))
9664 return 1;
9665 if (vector_all_ones_operand (x, mode))
9666 switch (mode)
9668 case V16QImode:
9669 case V8HImode:
9670 case V4SImode:
9671 case V2DImode:
9672 if (TARGET_SSE2)
9673 return 2;
9674 case V32QImode:
9675 case V16HImode:
9676 case V8SImode:
9677 case V4DImode:
9678 if (TARGET_AVX2)
9679 return 2;
9680 case V64QImode:
9681 case V32HImode:
9682 case V16SImode:
9683 case V8DImode:
9684 if (TARGET_AVX512F)
9685 return 2;
9686 default:
9687 break;
9690 return 0;
9693 /* Return the opcode of the special instruction to be used to load
9694 the constant X. */
9696 const char *
9697 standard_sse_constant_opcode (rtx_insn *insn, rtx x)
9699 switch (standard_sse_constant_p (x))
9701 case 1:
9702 switch (get_attr_mode (insn))
9704 case MODE_XI:
9705 return "vpxord\t%g0, %g0, %g0";
9706 case MODE_V16SF:
9707 return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
9708 : "vpxord\t%g0, %g0, %g0";
9709 case MODE_V8DF:
9710 return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
9711 : "vpxorq\t%g0, %g0, %g0";
9712 case MODE_TI:
9713 return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
9714 : "%vpxor\t%0, %d0";
9715 case MODE_V2DF:
9716 return "%vxorpd\t%0, %d0";
9717 case MODE_V4SF:
9718 return "%vxorps\t%0, %d0";
9720 case MODE_OI:
9721 return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
9722 : "vpxor\t%x0, %x0, %x0";
9723 case MODE_V4DF:
9724 return "vxorpd\t%x0, %x0, %x0";
9725 case MODE_V8SF:
9726 return "vxorps\t%x0, %x0, %x0";
9728 default:
9729 break;
9732 case 2:
9733 if (TARGET_AVX512VL
9734 || get_attr_mode (insn) == MODE_XI
9735 || get_attr_mode (insn) == MODE_V8DF
9736 || get_attr_mode (insn) == MODE_V16SF)
9737 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9738 if (TARGET_AVX)
9739 return "vpcmpeqd\t%0, %0, %0";
9740 else
9741 return "pcmpeqd\t%0, %0";
9743 default:
9744 break;
9746 gcc_unreachable ();
9749 /* Returns true if OP contains a symbol reference */
9751 bool
9752 symbolic_reference_mentioned_p (rtx op)
9754 const char *fmt;
9755 int i;
9757 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9758 return true;
9760 fmt = GET_RTX_FORMAT (GET_CODE (op));
9761 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9763 if (fmt[i] == 'E')
9765 int j;
9767 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9768 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9769 return true;
9772 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9773 return true;
9776 return false;
9779 /* Return true if it is appropriate to emit `ret' instructions in the
9780 body of a function. Do this only if the epilogue is simple, needing a
9781 couple of insns. Prior to reloading, we can't tell how many registers
9782 must be saved, so return false then. Return false if there is no frame
9783 marker to de-allocate. */
9785 bool
9786 ix86_can_use_return_insn_p (void)
9788 struct ix86_frame frame;
9790 if (! reload_completed || frame_pointer_needed)
9791 return 0;
9793 /* Don't allow more than 32k pop, since that's all we can do
9794 with one instruction. */
9795 if (crtl->args.pops_args && crtl->args.size >= 32768)
9796 return 0;
9798 ix86_compute_frame_layout (&frame);
9799 return (frame.stack_pointer_offset == UNITS_PER_WORD
9800 && (frame.nregs + frame.nsseregs) == 0);
9803 /* Value should be nonzero if functions must have frame pointers.
9804 Zero means the frame pointer need not be set up (and parms may
9805 be accessed via the stack pointer) in functions that seem suitable. */
9807 static bool
9808 ix86_frame_pointer_required (void)
9810 /* If we accessed previous frames, then the generated code expects
9811 to be able to access the saved ebp value in our frame. */
9812 if (cfun->machine->accesses_prev_frame)
9813 return true;
9815 /* Several x86 os'es need a frame pointer for other reasons,
9816 usually pertaining to setjmp. */
9817 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9818 return true;
9820 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9821 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9822 return true;
9824 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9825 allocation is 4GB. */
9826 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9827 return true;
9829 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9830 turns off the frame pointer by default. Turn it back on now if
9831 we've not got a leaf function. */
9832 if (TARGET_OMIT_LEAF_FRAME_POINTER
9833 && (!crtl->is_leaf
9834 || ix86_current_function_calls_tls_descriptor))
9835 return true;
9837 if (crtl->profile && !flag_fentry)
9838 return true;
9840 return false;
9843 /* Record that the current function accesses previous call frames. */
9845 void
9846 ix86_setup_frame_addresses (void)
9848 cfun->machine->accesses_prev_frame = 1;
9851 #ifndef USE_HIDDEN_LINKONCE
9852 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9853 # define USE_HIDDEN_LINKONCE 1
9854 # else
9855 # define USE_HIDDEN_LINKONCE 0
9856 # endif
9857 #endif
9859 static int pic_labels_used;
9861 /* Fills in the label name that should be used for a pc thunk for
9862 the given register. */
9864 static void
9865 get_pc_thunk_name (char name[32], unsigned int regno)
9867 gcc_assert (!TARGET_64BIT);
9869 if (USE_HIDDEN_LINKONCE)
9870 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9871 else
9872 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9876 /* This function generates code for -fpic that loads %ebx with
9877 the return address of the caller and then returns. */
9879 static void
9880 ix86_code_end (void)
9882 rtx xops[2];
9883 int regno;
9885 for (regno = AX_REG; regno <= SP_REG; regno++)
9887 char name[32];
9888 tree decl;
9890 if (!(pic_labels_used & (1 << regno)))
9891 continue;
9893 get_pc_thunk_name (name, regno);
9895 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9896 get_identifier (name),
9897 build_function_type_list (void_type_node, NULL_TREE));
9898 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9899 NULL_TREE, void_type_node);
9900 TREE_PUBLIC (decl) = 1;
9901 TREE_STATIC (decl) = 1;
9902 DECL_IGNORED_P (decl) = 1;
9904 #if TARGET_MACHO
9905 if (TARGET_MACHO)
9907 switch_to_section (darwin_sections[text_coal_section]);
9908 fputs ("\t.weak_definition\t", asm_out_file);
9909 assemble_name (asm_out_file, name);
9910 fputs ("\n\t.private_extern\t", asm_out_file);
9911 assemble_name (asm_out_file, name);
9912 putc ('\n', asm_out_file);
9913 ASM_OUTPUT_LABEL (asm_out_file, name);
9914 DECL_WEAK (decl) = 1;
9916 else
9917 #endif
9918 if (USE_HIDDEN_LINKONCE)
9920 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
9922 targetm.asm_out.unique_section (decl, 0);
9923 switch_to_section (get_named_section (decl, NULL, 0));
9925 targetm.asm_out.globalize_label (asm_out_file, name);
9926 fputs ("\t.hidden\t", asm_out_file);
9927 assemble_name (asm_out_file, name);
9928 putc ('\n', asm_out_file);
9929 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9931 else
9933 switch_to_section (text_section);
9934 ASM_OUTPUT_LABEL (asm_out_file, name);
9937 DECL_INITIAL (decl) = make_node (BLOCK);
9938 current_function_decl = decl;
9939 init_function_start (decl);
9940 first_function_block_is_cold = false;
9941 /* Make sure unwind info is emitted for the thunk if needed. */
9942 final_start_function (emit_barrier (), asm_out_file, 1);
9944 /* Pad stack IP move with 4 instructions (two NOPs count
9945 as one instruction). */
9946 if (TARGET_PAD_SHORT_FUNCTION)
9948 int i = 8;
9950 while (i--)
9951 fputs ("\tnop\n", asm_out_file);
9954 xops[0] = gen_rtx_REG (Pmode, regno);
9955 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9956 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9957 output_asm_insn ("%!ret", NULL);
9958 final_end_function ();
9959 init_insn_lengths ();
9960 free_after_compilation (cfun);
9961 set_cfun (NULL);
9962 current_function_decl = NULL;
9965 if (flag_split_stack)
9966 file_end_indicate_split_stack ();
9969 /* Emit code for the SET_GOT patterns. */
9971 const char *
9972 output_set_got (rtx dest, rtx label)
9974 rtx xops[3];
9976 xops[0] = dest;
9978 if (TARGET_VXWORKS_RTP && flag_pic)
9980 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9981 xops[2] = gen_rtx_MEM (Pmode,
9982 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9983 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9985 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9986 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9987 an unadorned address. */
9988 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9989 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9990 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9991 return "";
9994 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9996 if (!flag_pic)
9998 if (TARGET_MACHO)
9999 /* We don't need a pic base, we're not producing pic. */
10000 gcc_unreachable ();
10002 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
10003 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
10004 targetm.asm_out.internal_label (asm_out_file, "L",
10005 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
10007 else
10009 char name[32];
10010 get_pc_thunk_name (name, REGNO (dest));
10011 pic_labels_used |= 1 << REGNO (dest);
10013 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
10014 xops[2] = gen_rtx_MEM (QImode, xops[2]);
10015 output_asm_insn ("%!call\t%X2", xops);
10017 #if TARGET_MACHO
10018 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
10019 This is what will be referenced by the Mach-O PIC subsystem. */
10020 if (machopic_should_output_picbase_label () || !label)
10021 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
10023 /* When we are restoring the pic base at the site of a nonlocal label,
10024 and we decided to emit the pic base above, we will still output a
10025 local label used for calculating the correction offset (even though
10026 the offset will be 0 in that case). */
10027 if (label)
10028 targetm.asm_out.internal_label (asm_out_file, "L",
10029 CODE_LABEL_NUMBER (label));
10030 #endif
10033 if (!TARGET_MACHO)
10034 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
10036 return "";
10039 /* Generate an "push" pattern for input ARG. */
10041 static rtx
10042 gen_push (rtx arg)
10044 struct machine_function *m = cfun->machine;
10046 if (m->fs.cfa_reg == stack_pointer_rtx)
10047 m->fs.cfa_offset += UNITS_PER_WORD;
10048 m->fs.sp_offset += UNITS_PER_WORD;
10050 if (REG_P (arg) && GET_MODE (arg) != word_mode)
10051 arg = gen_rtx_REG (word_mode, REGNO (arg));
10053 return gen_rtx_SET (gen_rtx_MEM (word_mode,
10054 gen_rtx_PRE_DEC (Pmode,
10055 stack_pointer_rtx)),
10056 arg);
10059 /* Generate an "pop" pattern for input ARG. */
10061 static rtx
10062 gen_pop (rtx arg)
10064 if (REG_P (arg) && GET_MODE (arg) != word_mode)
10065 arg = gen_rtx_REG (word_mode, REGNO (arg));
10067 return gen_rtx_SET (arg,
10068 gen_rtx_MEM (word_mode,
10069 gen_rtx_POST_INC (Pmode,
10070 stack_pointer_rtx)));
10073 /* Return >= 0 if there is an unused call-clobbered register available
10074 for the entire function. */
10076 static unsigned int
10077 ix86_select_alt_pic_regnum (void)
10079 if (ix86_use_pseudo_pic_reg ())
10080 return INVALID_REGNUM;
10082 if (crtl->is_leaf
10083 && !crtl->profile
10084 && !ix86_current_function_calls_tls_descriptor)
10086 int i, drap;
10087 /* Can't use the same register for both PIC and DRAP. */
10088 if (crtl->drap_reg)
10089 drap = REGNO (crtl->drap_reg);
10090 else
10091 drap = -1;
10092 for (i = 2; i >= 0; --i)
10093 if (i != drap && !df_regs_ever_live_p (i))
10094 return i;
10097 return INVALID_REGNUM;
10100 /* Return TRUE if we need to save REGNO. */
10102 static bool
10103 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
10105 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
10106 && pic_offset_table_rtx)
10108 if (ix86_use_pseudo_pic_reg ())
10110 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
10111 _mcount in prologue. */
10112 if (!TARGET_64BIT && flag_pic && crtl->profile)
10113 return true;
10115 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
10116 || crtl->profile
10117 || crtl->calls_eh_return
10118 || crtl->uses_const_pool
10119 || cfun->has_nonlocal_label)
10120 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
10123 if (crtl->calls_eh_return && maybe_eh_return)
10125 unsigned i;
10126 for (i = 0; ; i++)
10128 unsigned test = EH_RETURN_DATA_REGNO (i);
10129 if (test == INVALID_REGNUM)
10130 break;
10131 if (test == regno)
10132 return true;
10136 if (crtl->drap_reg
10137 && regno == REGNO (crtl->drap_reg)
10138 && !cfun->machine->no_drap_save_restore)
10139 return true;
10141 return (df_regs_ever_live_p (regno)
10142 && !call_used_regs[regno]
10143 && !fixed_regs[regno]
10144 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
10147 /* Return number of saved general prupose registers. */
10149 static int
10150 ix86_nsaved_regs (void)
10152 int nregs = 0;
10153 int regno;
10155 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10156 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10157 nregs ++;
10158 return nregs;
10161 /* Return number of saved SSE registrers. */
10163 static int
10164 ix86_nsaved_sseregs (void)
10166 int nregs = 0;
10167 int regno;
10169 if (!TARGET_64BIT_MS_ABI)
10170 return 0;
10171 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10172 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10173 nregs ++;
10174 return nregs;
10177 /* Given FROM and TO register numbers, say whether this elimination is
10178 allowed. If stack alignment is needed, we can only replace argument
10179 pointer with hard frame pointer, or replace frame pointer with stack
10180 pointer. Otherwise, frame pointer elimination is automatically
10181 handled and all other eliminations are valid. */
10183 static bool
10184 ix86_can_eliminate (const int from, const int to)
10186 if (stack_realign_fp)
10187 return ((from == ARG_POINTER_REGNUM
10188 && to == HARD_FRAME_POINTER_REGNUM)
10189 || (from == FRAME_POINTER_REGNUM
10190 && to == STACK_POINTER_REGNUM));
10191 else
10192 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
10195 /* Return the offset between two registers, one to be eliminated, and the other
10196 its replacement, at the start of a routine. */
10198 HOST_WIDE_INT
10199 ix86_initial_elimination_offset (int from, int to)
10201 struct ix86_frame frame;
10202 ix86_compute_frame_layout (&frame);
10204 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10205 return frame.hard_frame_pointer_offset;
10206 else if (from == FRAME_POINTER_REGNUM
10207 && to == HARD_FRAME_POINTER_REGNUM)
10208 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
10209 else
10211 gcc_assert (to == STACK_POINTER_REGNUM);
10213 if (from == ARG_POINTER_REGNUM)
10214 return frame.stack_pointer_offset;
10216 gcc_assert (from == FRAME_POINTER_REGNUM);
10217 return frame.stack_pointer_offset - frame.frame_pointer_offset;
10221 /* In a dynamically-aligned function, we can't know the offset from
10222 stack pointer to frame pointer, so we must ensure that setjmp
10223 eliminates fp against the hard fp (%ebp) rather than trying to
10224 index from %esp up to the top of the frame across a gap that is
10225 of unknown (at compile-time) size. */
10226 static rtx
10227 ix86_builtin_setjmp_frame_value (void)
10229 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
10232 /* When using -fsplit-stack, the allocation routines set a field in
10233 the TCB to the bottom of the stack plus this much space, measured
10234 in bytes. */
10236 #define SPLIT_STACK_AVAILABLE 256
10238 /* Fill structure ix86_frame about frame of currently computed function. */
10240 static void
10241 ix86_compute_frame_layout (struct ix86_frame *frame)
10243 unsigned HOST_WIDE_INT stack_alignment_needed;
10244 HOST_WIDE_INT offset;
10245 unsigned HOST_WIDE_INT preferred_alignment;
10246 HOST_WIDE_INT size = get_frame_size ();
10247 HOST_WIDE_INT to_allocate;
10249 frame->nregs = ix86_nsaved_regs ();
10250 frame->nsseregs = ix86_nsaved_sseregs ();
10252 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
10253 function prologues and leaf. */
10254 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
10255 && (!crtl->is_leaf || cfun->calls_alloca != 0
10256 || ix86_current_function_calls_tls_descriptor))
10258 crtl->preferred_stack_boundary = 128;
10259 crtl->stack_alignment_needed = 128;
10261 /* preferred_stack_boundary is never updated for call
10262 expanded from tls descriptor. Update it here. We don't update it in
10263 expand stage because according to the comments before
10264 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
10265 away. */
10266 else if (ix86_current_function_calls_tls_descriptor
10267 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
10269 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
10270 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
10271 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
10274 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
10275 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
10277 gcc_assert (!size || stack_alignment_needed);
10278 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
10279 gcc_assert (preferred_alignment <= stack_alignment_needed);
10281 /* For SEH we have to limit the amount of code movement into the prologue.
10282 At present we do this via a BLOCKAGE, at which point there's very little
10283 scheduling that can be done, which means that there's very little point
10284 in doing anything except PUSHs. */
10285 if (TARGET_SEH)
10286 cfun->machine->use_fast_prologue_epilogue = false;
10288 /* During reload iteration the amount of registers saved can change.
10289 Recompute the value as needed. Do not recompute when amount of registers
10290 didn't change as reload does multiple calls to the function and does not
10291 expect the decision to change within single iteration. */
10292 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
10293 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
10295 int count = frame->nregs;
10296 struct cgraph_node *node = cgraph_node::get (current_function_decl);
10298 cfun->machine->use_fast_prologue_epilogue_nregs = count;
10300 /* The fast prologue uses move instead of push to save registers. This
10301 is significantly longer, but also executes faster as modern hardware
10302 can execute the moves in parallel, but can't do that for push/pop.
10304 Be careful about choosing what prologue to emit: When function takes
10305 many instructions to execute we may use slow version as well as in
10306 case function is known to be outside hot spot (this is known with
10307 feedback only). Weight the size of function by number of registers
10308 to save as it is cheap to use one or two push instructions but very
10309 slow to use many of them. */
10310 if (count)
10311 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
10312 if (node->frequency < NODE_FREQUENCY_NORMAL
10313 || (flag_branch_probabilities
10314 && node->frequency < NODE_FREQUENCY_HOT))
10315 cfun->machine->use_fast_prologue_epilogue = false;
10316 else
10317 cfun->machine->use_fast_prologue_epilogue
10318 = !expensive_function_p (count);
10321 frame->save_regs_using_mov
10322 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
10323 /* If static stack checking is enabled and done with probes,
10324 the registers need to be saved before allocating the frame. */
10325 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
10327 /* Skip return address. */
10328 offset = UNITS_PER_WORD;
10330 /* Skip pushed static chain. */
10331 if (ix86_static_chain_on_stack)
10332 offset += UNITS_PER_WORD;
10334 /* Skip saved base pointer. */
10335 if (frame_pointer_needed)
10336 offset += UNITS_PER_WORD;
10337 frame->hfp_save_offset = offset;
10339 /* The traditional frame pointer location is at the top of the frame. */
10340 frame->hard_frame_pointer_offset = offset;
10342 /* Register save area */
10343 offset += frame->nregs * UNITS_PER_WORD;
10344 frame->reg_save_offset = offset;
10346 /* On SEH target, registers are pushed just before the frame pointer
10347 location. */
10348 if (TARGET_SEH)
10349 frame->hard_frame_pointer_offset = offset;
10351 /* Align and set SSE register save area. */
10352 if (frame->nsseregs)
10354 /* The only ABI that has saved SSE registers (Win64) also has a
10355 16-byte aligned default stack, and thus we don't need to be
10356 within the re-aligned local stack frame to save them. */
10357 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
10358 offset = (offset + 16 - 1) & -16;
10359 offset += frame->nsseregs * 16;
10361 frame->sse_reg_save_offset = offset;
10363 /* The re-aligned stack starts here. Values before this point are not
10364 directly comparable with values below this point. In order to make
10365 sure that no value happens to be the same before and after, force
10366 the alignment computation below to add a non-zero value. */
10367 if (stack_realign_fp)
10368 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
10370 /* Va-arg area */
10371 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
10372 offset += frame->va_arg_size;
10374 /* Align start of frame for local function. */
10375 if (stack_realign_fp
10376 || offset != frame->sse_reg_save_offset
10377 || size != 0
10378 || !crtl->is_leaf
10379 || cfun->calls_alloca
10380 || ix86_current_function_calls_tls_descriptor)
10381 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
10383 /* Frame pointer points here. */
10384 frame->frame_pointer_offset = offset;
10386 offset += size;
10388 /* Add outgoing arguments area. Can be skipped if we eliminated
10389 all the function calls as dead code.
10390 Skipping is however impossible when function calls alloca. Alloca
10391 expander assumes that last crtl->outgoing_args_size
10392 of stack frame are unused. */
10393 if (ACCUMULATE_OUTGOING_ARGS
10394 && (!crtl->is_leaf || cfun->calls_alloca
10395 || ix86_current_function_calls_tls_descriptor))
10397 offset += crtl->outgoing_args_size;
10398 frame->outgoing_arguments_size = crtl->outgoing_args_size;
10400 else
10401 frame->outgoing_arguments_size = 0;
10403 /* Align stack boundary. Only needed if we're calling another function
10404 or using alloca. */
10405 if (!crtl->is_leaf || cfun->calls_alloca
10406 || ix86_current_function_calls_tls_descriptor)
10407 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
10409 /* We've reached end of stack frame. */
10410 frame->stack_pointer_offset = offset;
10412 /* Size prologue needs to allocate. */
10413 to_allocate = offset - frame->sse_reg_save_offset;
10415 if ((!to_allocate && frame->nregs <= 1)
10416 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
10417 frame->save_regs_using_mov = false;
10419 if (ix86_using_red_zone ()
10420 && crtl->sp_is_unchanging
10421 && crtl->is_leaf
10422 && !ix86_current_function_calls_tls_descriptor)
10424 frame->red_zone_size = to_allocate;
10425 if (frame->save_regs_using_mov)
10426 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
10427 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
10428 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
10430 else
10431 frame->red_zone_size = 0;
10432 frame->stack_pointer_offset -= frame->red_zone_size;
10434 /* The SEH frame pointer location is near the bottom of the frame.
10435 This is enforced by the fact that the difference between the
10436 stack pointer and the frame pointer is limited to 240 bytes in
10437 the unwind data structure. */
10438 if (TARGET_SEH)
10440 HOST_WIDE_INT diff;
10442 /* If we can leave the frame pointer where it is, do so. Also, returns
10443 the establisher frame for __builtin_frame_address (0). */
10444 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
10445 if (diff <= SEH_MAX_FRAME_SIZE
10446 && (diff > 240 || (diff & 15) != 0)
10447 && !crtl->accesses_prior_frames)
10449 /* Ideally we'd determine what portion of the local stack frame
10450 (within the constraint of the lowest 240) is most heavily used.
10451 But without that complication, simply bias the frame pointer
10452 by 128 bytes so as to maximize the amount of the local stack
10453 frame that is addressable with 8-bit offsets. */
10454 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
10459 /* This is semi-inlined memory_address_length, but simplified
10460 since we know that we're always dealing with reg+offset, and
10461 to avoid having to create and discard all that rtl. */
10463 static inline int
10464 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
10466 int len = 4;
10468 if (offset == 0)
10470 /* EBP and R13 cannot be encoded without an offset. */
10471 len = (regno == BP_REG || regno == R13_REG);
10473 else if (IN_RANGE (offset, -128, 127))
10474 len = 1;
10476 /* ESP and R12 must be encoded with a SIB byte. */
10477 if (regno == SP_REG || regno == R12_REG)
10478 len++;
10480 return len;
10483 /* Return an RTX that points to CFA_OFFSET within the stack frame.
10484 The valid base registers are taken from CFUN->MACHINE->FS. */
10486 static rtx
10487 choose_baseaddr (HOST_WIDE_INT cfa_offset)
10489 const struct machine_function *m = cfun->machine;
10490 rtx base_reg = NULL;
10491 HOST_WIDE_INT base_offset = 0;
10493 if (m->use_fast_prologue_epilogue)
10495 /* Choose the base register most likely to allow the most scheduling
10496 opportunities. Generally FP is valid throughout the function,
10497 while DRAP must be reloaded within the epilogue. But choose either
10498 over the SP due to increased encoding size. */
10500 if (m->fs.fp_valid)
10502 base_reg = hard_frame_pointer_rtx;
10503 base_offset = m->fs.fp_offset - cfa_offset;
10505 else if (m->fs.drap_valid)
10507 base_reg = crtl->drap_reg;
10508 base_offset = 0 - cfa_offset;
10510 else if (m->fs.sp_valid)
10512 base_reg = stack_pointer_rtx;
10513 base_offset = m->fs.sp_offset - cfa_offset;
10516 else
10518 HOST_WIDE_INT toffset;
10519 int len = 16, tlen;
10521 /* Choose the base register with the smallest address encoding.
10522 With a tie, choose FP > DRAP > SP. */
10523 if (m->fs.sp_valid)
10525 base_reg = stack_pointer_rtx;
10526 base_offset = m->fs.sp_offset - cfa_offset;
10527 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
10529 if (m->fs.drap_valid)
10531 toffset = 0 - cfa_offset;
10532 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
10533 if (tlen <= len)
10535 base_reg = crtl->drap_reg;
10536 base_offset = toffset;
10537 len = tlen;
10540 if (m->fs.fp_valid)
10542 toffset = m->fs.fp_offset - cfa_offset;
10543 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
10544 if (tlen <= len)
10546 base_reg = hard_frame_pointer_rtx;
10547 base_offset = toffset;
10548 len = tlen;
10552 gcc_assert (base_reg != NULL);
10554 return plus_constant (Pmode, base_reg, base_offset);
10557 /* Emit code to save registers in the prologue. */
10559 static void
10560 ix86_emit_save_regs (void)
10562 unsigned int regno;
10563 rtx_insn *insn;
10565 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
10566 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10568 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
10569 RTX_FRAME_RELATED_P (insn) = 1;
10573 /* Emit a single register save at CFA - CFA_OFFSET. */
10575 static void
10576 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
10577 HOST_WIDE_INT cfa_offset)
10579 struct machine_function *m = cfun->machine;
10580 rtx reg = gen_rtx_REG (mode, regno);
10581 rtx mem, addr, base, insn;
10583 addr = choose_baseaddr (cfa_offset);
10584 mem = gen_frame_mem (mode, addr);
10586 /* For SSE saves, we need to indicate the 128-bit alignment. */
10587 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
10589 insn = emit_move_insn (mem, reg);
10590 RTX_FRAME_RELATED_P (insn) = 1;
10592 base = addr;
10593 if (GET_CODE (base) == PLUS)
10594 base = XEXP (base, 0);
10595 gcc_checking_assert (REG_P (base));
10597 /* When saving registers into a re-aligned local stack frame, avoid
10598 any tricky guessing by dwarf2out. */
10599 if (m->fs.realigned)
10601 gcc_checking_assert (stack_realign_drap);
10603 if (regno == REGNO (crtl->drap_reg))
10605 /* A bit of a hack. We force the DRAP register to be saved in
10606 the re-aligned stack frame, which provides us with a copy
10607 of the CFA that will last past the prologue. Install it. */
10608 gcc_checking_assert (cfun->machine->fs.fp_valid);
10609 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10610 cfun->machine->fs.fp_offset - cfa_offset);
10611 mem = gen_rtx_MEM (mode, addr);
10612 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
10614 else
10616 /* The frame pointer is a stable reference within the
10617 aligned frame. Use it. */
10618 gcc_checking_assert (cfun->machine->fs.fp_valid);
10619 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10620 cfun->machine->fs.fp_offset - cfa_offset);
10621 mem = gen_rtx_MEM (mode, addr);
10622 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
10626 /* The memory may not be relative to the current CFA register,
10627 which means that we may need to generate a new pattern for
10628 use by the unwind info. */
10629 else if (base != m->fs.cfa_reg)
10631 addr = plus_constant (Pmode, m->fs.cfa_reg,
10632 m->fs.cfa_offset - cfa_offset);
10633 mem = gen_rtx_MEM (mode, addr);
10634 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
10638 /* Emit code to save registers using MOV insns.
10639 First register is stored at CFA - CFA_OFFSET. */
10640 static void
10641 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
10643 unsigned int regno;
10645 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10646 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10648 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
10649 cfa_offset -= UNITS_PER_WORD;
10653 /* Emit code to save SSE registers using MOV insns.
10654 First register is stored at CFA - CFA_OFFSET. */
10655 static void
10656 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
10658 unsigned int regno;
10660 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10661 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10663 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
10664 cfa_offset -= 16;
10668 static GTY(()) rtx queued_cfa_restores;
10670 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
10671 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
10672 Don't add the note if the previously saved value will be left untouched
10673 within stack red-zone till return, as unwinders can find the same value
10674 in the register and on the stack. */
10676 static void
10677 ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
10679 if (!crtl->shrink_wrapped
10680 && cfa_offset <= cfun->machine->fs.red_zone_offset)
10681 return;
10683 if (insn)
10685 add_reg_note (insn, REG_CFA_RESTORE, reg);
10686 RTX_FRAME_RELATED_P (insn) = 1;
10688 else
10689 queued_cfa_restores
10690 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
10693 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
10695 static void
10696 ix86_add_queued_cfa_restore_notes (rtx insn)
10698 rtx last;
10699 if (!queued_cfa_restores)
10700 return;
10701 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
10703 XEXP (last, 1) = REG_NOTES (insn);
10704 REG_NOTES (insn) = queued_cfa_restores;
10705 queued_cfa_restores = NULL_RTX;
10706 RTX_FRAME_RELATED_P (insn) = 1;
10709 /* Expand prologue or epilogue stack adjustment.
10710 The pattern exist to put a dependency on all ebp-based memory accesses.
10711 STYLE should be negative if instructions should be marked as frame related,
10712 zero if %r11 register is live and cannot be freely used and positive
10713 otherwise. */
10715 static void
10716 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
10717 int style, bool set_cfa)
10719 struct machine_function *m = cfun->machine;
10720 rtx insn;
10721 bool add_frame_related_expr = false;
10723 if (Pmode == SImode)
10724 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
10725 else if (x86_64_immediate_operand (offset, DImode))
10726 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
10727 else
10729 rtx tmp;
10730 /* r11 is used by indirect sibcall return as well, set before the
10731 epilogue and used after the epilogue. */
10732 if (style)
10733 tmp = gen_rtx_REG (DImode, R11_REG);
10734 else
10736 gcc_assert (src != hard_frame_pointer_rtx
10737 && dest != hard_frame_pointer_rtx);
10738 tmp = hard_frame_pointer_rtx;
10740 insn = emit_insn (gen_rtx_SET (tmp, offset));
10741 if (style < 0)
10742 add_frame_related_expr = true;
10744 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10747 insn = emit_insn (insn);
10748 if (style >= 0)
10749 ix86_add_queued_cfa_restore_notes (insn);
10751 if (set_cfa)
10753 rtx r;
10755 gcc_assert (m->fs.cfa_reg == src);
10756 m->fs.cfa_offset += INTVAL (offset);
10757 m->fs.cfa_reg = dest;
10759 r = gen_rtx_PLUS (Pmode, src, offset);
10760 r = gen_rtx_SET (dest, r);
10761 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10762 RTX_FRAME_RELATED_P (insn) = 1;
10764 else if (style < 0)
10766 RTX_FRAME_RELATED_P (insn) = 1;
10767 if (add_frame_related_expr)
10769 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10770 r = gen_rtx_SET (dest, r);
10771 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10775 if (dest == stack_pointer_rtx)
10777 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10778 bool valid = m->fs.sp_valid;
10780 if (src == hard_frame_pointer_rtx)
10782 valid = m->fs.fp_valid;
10783 ooffset = m->fs.fp_offset;
10785 else if (src == crtl->drap_reg)
10787 valid = m->fs.drap_valid;
10788 ooffset = 0;
10790 else
10792 /* Else there are two possibilities: SP itself, which we set
10793 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10794 taken care of this by hand along the eh_return path. */
10795 gcc_checking_assert (src == stack_pointer_rtx
10796 || offset == const0_rtx);
10799 m->fs.sp_offset = ooffset - INTVAL (offset);
10800 m->fs.sp_valid = valid;
10804 /* Find an available register to be used as dynamic realign argument
10805 pointer regsiter. Such a register will be written in prologue and
10806 used in begin of body, so it must not be
10807 1. parameter passing register.
10808 2. GOT pointer.
10809 We reuse static-chain register if it is available. Otherwise, we
10810 use DI for i386 and R13 for x86-64. We chose R13 since it has
10811 shorter encoding.
10813 Return: the regno of chosen register. */
10815 static unsigned int
10816 find_drap_reg (void)
10818 tree decl = cfun->decl;
10820 if (TARGET_64BIT)
10822 /* Use R13 for nested function or function need static chain.
10823 Since function with tail call may use any caller-saved
10824 registers in epilogue, DRAP must not use caller-saved
10825 register in such case. */
10826 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10827 return R13_REG;
10829 return R10_REG;
10831 else
10833 /* Use DI for nested function or function need static chain.
10834 Since function with tail call may use any caller-saved
10835 registers in epilogue, DRAP must not use caller-saved
10836 register in such case. */
10837 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10838 return DI_REG;
10840 /* Reuse static chain register if it isn't used for parameter
10841 passing. */
10842 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10844 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10845 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10846 return CX_REG;
10848 return DI_REG;
10852 /* Return minimum incoming stack alignment. */
10854 static unsigned int
10855 ix86_minimum_incoming_stack_boundary (bool sibcall)
10857 unsigned int incoming_stack_boundary;
10859 /* Prefer the one specified at command line. */
10860 if (ix86_user_incoming_stack_boundary)
10861 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10862 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10863 if -mstackrealign is used, it isn't used for sibcall check and
10864 estimated stack alignment is 128bit. */
10865 else if (!sibcall
10866 && !TARGET_64BIT
10867 && ix86_force_align_arg_pointer
10868 && crtl->stack_alignment_estimated == 128)
10869 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10870 else
10871 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10873 /* Incoming stack alignment can be changed on individual functions
10874 via force_align_arg_pointer attribute. We use the smallest
10875 incoming stack boundary. */
10876 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10877 && lookup_attribute (ix86_force_align_arg_pointer_string,
10878 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10879 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10881 /* The incoming stack frame has to be aligned at least at
10882 parm_stack_boundary. */
10883 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10884 incoming_stack_boundary = crtl->parm_stack_boundary;
10886 /* Stack at entrance of main is aligned by runtime. We use the
10887 smallest incoming stack boundary. */
10888 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10889 && DECL_NAME (current_function_decl)
10890 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10891 && DECL_FILE_SCOPE_P (current_function_decl))
10892 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10894 return incoming_stack_boundary;
10897 /* Update incoming stack boundary and estimated stack alignment. */
10899 static void
10900 ix86_update_stack_boundary (void)
10902 ix86_incoming_stack_boundary
10903 = ix86_minimum_incoming_stack_boundary (false);
10905 /* x86_64 vararg needs 16byte stack alignment for register save
10906 area. */
10907 if (TARGET_64BIT
10908 && cfun->stdarg
10909 && crtl->stack_alignment_estimated < 128)
10910 crtl->stack_alignment_estimated = 128;
10913 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10914 needed or an rtx for DRAP otherwise. */
10916 static rtx
10917 ix86_get_drap_rtx (void)
10919 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10920 crtl->need_drap = true;
10922 if (stack_realign_drap)
10924 /* Assign DRAP to vDRAP and returns vDRAP */
10925 unsigned int regno = find_drap_reg ();
10926 rtx drap_vreg;
10927 rtx arg_ptr;
10928 rtx_insn *seq, *insn;
10930 arg_ptr = gen_rtx_REG (Pmode, regno);
10931 crtl->drap_reg = arg_ptr;
10933 start_sequence ();
10934 drap_vreg = copy_to_reg (arg_ptr);
10935 seq = get_insns ();
10936 end_sequence ();
10938 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10939 if (!optimize)
10941 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10942 RTX_FRAME_RELATED_P (insn) = 1;
10944 return drap_vreg;
10946 else
10947 return NULL;
10950 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10952 static rtx
10953 ix86_internal_arg_pointer (void)
10955 return virtual_incoming_args_rtx;
10958 struct scratch_reg {
10959 rtx reg;
10960 bool saved;
10963 /* Return a short-lived scratch register for use on function entry.
10964 In 32-bit mode, it is valid only after the registers are saved
10965 in the prologue. This register must be released by means of
10966 release_scratch_register_on_entry once it is dead. */
10968 static void
10969 get_scratch_register_on_entry (struct scratch_reg *sr)
10971 int regno;
10973 sr->saved = false;
10975 if (TARGET_64BIT)
10977 /* We always use R11 in 64-bit mode. */
10978 regno = R11_REG;
10980 else
10982 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10983 bool fastcall_p
10984 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10985 bool thiscall_p
10986 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10987 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10988 int regparm = ix86_function_regparm (fntype, decl);
10989 int drap_regno
10990 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10992 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10993 for the static chain register. */
10994 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10995 && drap_regno != AX_REG)
10996 regno = AX_REG;
10997 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10998 for the static chain register. */
10999 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
11000 regno = AX_REG;
11001 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
11002 regno = DX_REG;
11003 /* ecx is the static chain register. */
11004 else if (regparm < 3 && !fastcall_p && !thiscall_p
11005 && !static_chain_p
11006 && drap_regno != CX_REG)
11007 regno = CX_REG;
11008 else if (ix86_save_reg (BX_REG, true))
11009 regno = BX_REG;
11010 /* esi is the static chain register. */
11011 else if (!(regparm == 3 && static_chain_p)
11012 && ix86_save_reg (SI_REG, true))
11013 regno = SI_REG;
11014 else if (ix86_save_reg (DI_REG, true))
11015 regno = DI_REG;
11016 else
11018 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
11019 sr->saved = true;
11023 sr->reg = gen_rtx_REG (Pmode, regno);
11024 if (sr->saved)
11026 rtx_insn *insn = emit_insn (gen_push (sr->reg));
11027 RTX_FRAME_RELATED_P (insn) = 1;
11031 /* Release a scratch register obtained from the preceding function. */
11033 static void
11034 release_scratch_register_on_entry (struct scratch_reg *sr)
11036 if (sr->saved)
11038 struct machine_function *m = cfun->machine;
11039 rtx x, insn = emit_insn (gen_pop (sr->reg));
11041 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
11042 RTX_FRAME_RELATED_P (insn) = 1;
11043 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
11044 x = gen_rtx_SET (stack_pointer_rtx, x);
11045 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
11046 m->fs.sp_offset -= UNITS_PER_WORD;
11050 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
11052 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
11054 static void
11055 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
11057 /* We skip the probe for the first interval + a small dope of 4 words and
11058 probe that many bytes past the specified size to maintain a protection
11059 area at the botton of the stack. */
11060 const int dope = 4 * UNITS_PER_WORD;
11061 rtx size_rtx = GEN_INT (size), last;
11063 /* See if we have a constant small number of probes to generate. If so,
11064 that's the easy case. The run-time loop is made up of 11 insns in the
11065 generic case while the compile-time loop is made up of 3+2*(n-1) insns
11066 for n # of intervals. */
11067 if (size <= 5 * PROBE_INTERVAL)
11069 HOST_WIDE_INT i, adjust;
11070 bool first_probe = true;
11072 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
11073 values of N from 1 until it exceeds SIZE. If only one probe is
11074 needed, this will not generate any code. Then adjust and probe
11075 to PROBE_INTERVAL + SIZE. */
11076 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
11078 if (first_probe)
11080 adjust = 2 * PROBE_INTERVAL + dope;
11081 first_probe = false;
11083 else
11084 adjust = PROBE_INTERVAL;
11086 emit_insn (gen_rtx_SET (stack_pointer_rtx,
11087 plus_constant (Pmode, stack_pointer_rtx,
11088 -adjust)));
11089 emit_stack_probe (stack_pointer_rtx);
11092 if (first_probe)
11093 adjust = size + PROBE_INTERVAL + dope;
11094 else
11095 adjust = size + PROBE_INTERVAL - i;
11097 emit_insn (gen_rtx_SET (stack_pointer_rtx,
11098 plus_constant (Pmode, stack_pointer_rtx,
11099 -adjust)));
11100 emit_stack_probe (stack_pointer_rtx);
11102 /* Adjust back to account for the additional first interval. */
11103 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
11104 plus_constant (Pmode, stack_pointer_rtx,
11105 PROBE_INTERVAL + dope)));
11108 /* Otherwise, do the same as above, but in a loop. Note that we must be
11109 extra careful with variables wrapping around because we might be at
11110 the very top (or the very bottom) of the address space and we have
11111 to be able to handle this case properly; in particular, we use an
11112 equality test for the loop condition. */
11113 else
11115 HOST_WIDE_INT rounded_size;
11116 struct scratch_reg sr;
11118 get_scratch_register_on_entry (&sr);
11121 /* Step 1: round SIZE to the previous multiple of the interval. */
11123 rounded_size = size & -PROBE_INTERVAL;
11126 /* Step 2: compute initial and final value of the loop counter. */
11128 /* SP = SP_0 + PROBE_INTERVAL. */
11129 emit_insn (gen_rtx_SET (stack_pointer_rtx,
11130 plus_constant (Pmode, stack_pointer_rtx,
11131 - (PROBE_INTERVAL + dope))));
11133 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
11134 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
11135 emit_insn (gen_rtx_SET (sr.reg,
11136 gen_rtx_PLUS (Pmode, sr.reg,
11137 stack_pointer_rtx)));
11140 /* Step 3: the loop
11142 while (SP != LAST_ADDR)
11144 SP = SP + PROBE_INTERVAL
11145 probe at SP
11148 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
11149 values of N from 1 until it is equal to ROUNDED_SIZE. */
11151 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
11154 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
11155 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
11157 if (size != rounded_size)
11159 emit_insn (gen_rtx_SET (stack_pointer_rtx,
11160 plus_constant (Pmode, stack_pointer_rtx,
11161 rounded_size - size)));
11162 emit_stack_probe (stack_pointer_rtx);
11165 /* Adjust back to account for the additional first interval. */
11166 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
11167 plus_constant (Pmode, stack_pointer_rtx,
11168 PROBE_INTERVAL + dope)));
11170 release_scratch_register_on_entry (&sr);
11173 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
11175 /* Even if the stack pointer isn't the CFA register, we need to correctly
11176 describe the adjustments made to it, in particular differentiate the
11177 frame-related ones from the frame-unrelated ones. */
11178 if (size > 0)
11180 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
11181 XVECEXP (expr, 0, 0)
11182 = gen_rtx_SET (stack_pointer_rtx,
11183 plus_constant (Pmode, stack_pointer_rtx, -size));
11184 XVECEXP (expr, 0, 1)
11185 = gen_rtx_SET (stack_pointer_rtx,
11186 plus_constant (Pmode, stack_pointer_rtx,
11187 PROBE_INTERVAL + dope + size));
11188 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
11189 RTX_FRAME_RELATED_P (last) = 1;
11191 cfun->machine->fs.sp_offset += size;
11194 /* Make sure nothing is scheduled before we are done. */
11195 emit_insn (gen_blockage ());
11198 /* Adjust the stack pointer up to REG while probing it. */
11200 const char *
11201 output_adjust_stack_and_probe (rtx reg)
11203 static int labelno = 0;
11204 char loop_lab[32], end_lab[32];
11205 rtx xops[2];
11207 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
11208 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
11210 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
11212 /* Jump to END_LAB if SP == LAST_ADDR. */
11213 xops[0] = stack_pointer_rtx;
11214 xops[1] = reg;
11215 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
11216 fputs ("\tje\t", asm_out_file);
11217 assemble_name_raw (asm_out_file, end_lab);
11218 fputc ('\n', asm_out_file);
11220 /* SP = SP + PROBE_INTERVAL. */
11221 xops[1] = GEN_INT (PROBE_INTERVAL);
11222 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
11224 /* Probe at SP. */
11225 xops[1] = const0_rtx;
11226 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
11228 fprintf (asm_out_file, "\tjmp\t");
11229 assemble_name_raw (asm_out_file, loop_lab);
11230 fputc ('\n', asm_out_file);
11232 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
11234 return "";
11237 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
11238 inclusive. These are offsets from the current stack pointer. */
11240 static void
11241 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
11243 /* See if we have a constant small number of probes to generate. If so,
11244 that's the easy case. The run-time loop is made up of 7 insns in the
11245 generic case while the compile-time loop is made up of n insns for n #
11246 of intervals. */
11247 if (size <= 7 * PROBE_INTERVAL)
11249 HOST_WIDE_INT i;
11251 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
11252 it exceeds SIZE. If only one probe is needed, this will not
11253 generate any code. Then probe at FIRST + SIZE. */
11254 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
11255 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
11256 -(first + i)));
11258 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
11259 -(first + size)));
11262 /* Otherwise, do the same as above, but in a loop. Note that we must be
11263 extra careful with variables wrapping around because we might be at
11264 the very top (or the very bottom) of the address space and we have
11265 to be able to handle this case properly; in particular, we use an
11266 equality test for the loop condition. */
11267 else
11269 HOST_WIDE_INT rounded_size, last;
11270 struct scratch_reg sr;
11272 get_scratch_register_on_entry (&sr);
11275 /* Step 1: round SIZE to the previous multiple of the interval. */
11277 rounded_size = size & -PROBE_INTERVAL;
11280 /* Step 2: compute initial and final value of the loop counter. */
11282 /* TEST_OFFSET = FIRST. */
11283 emit_move_insn (sr.reg, GEN_INT (-first));
11285 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
11286 last = first + rounded_size;
11289 /* Step 3: the loop
11291 while (TEST_ADDR != LAST_ADDR)
11293 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
11294 probe at TEST_ADDR
11297 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
11298 until it is equal to ROUNDED_SIZE. */
11300 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
11303 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
11304 that SIZE is equal to ROUNDED_SIZE. */
11306 if (size != rounded_size)
11307 emit_stack_probe (plus_constant (Pmode,
11308 gen_rtx_PLUS (Pmode,
11309 stack_pointer_rtx,
11310 sr.reg),
11311 rounded_size - size));
11313 release_scratch_register_on_entry (&sr);
11316 /* Make sure nothing is scheduled before we are done. */
11317 emit_insn (gen_blockage ());
11320 /* Probe a range of stack addresses from REG to END, inclusive. These are
11321 offsets from the current stack pointer. */
11323 const char *
11324 output_probe_stack_range (rtx reg, rtx end)
11326 static int labelno = 0;
11327 char loop_lab[32], end_lab[32];
11328 rtx xops[3];
11330 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
11331 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
11333 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
11335 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
11336 xops[0] = reg;
11337 xops[1] = end;
11338 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
11339 fputs ("\tje\t", asm_out_file);
11340 assemble_name_raw (asm_out_file, end_lab);
11341 fputc ('\n', asm_out_file);
11343 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
11344 xops[1] = GEN_INT (PROBE_INTERVAL);
11345 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
11347 /* Probe at TEST_ADDR. */
11348 xops[0] = stack_pointer_rtx;
11349 xops[1] = reg;
11350 xops[2] = const0_rtx;
11351 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
11353 fprintf (asm_out_file, "\tjmp\t");
11354 assemble_name_raw (asm_out_file, loop_lab);
11355 fputc ('\n', asm_out_file);
11357 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
11359 return "";
11362 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
11363 to be generated in correct form. */
11364 static void
11365 ix86_finalize_stack_realign_flags (void)
11367 /* Check if stack realign is really needed after reload, and
11368 stores result in cfun */
11369 unsigned int incoming_stack_boundary
11370 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
11371 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
11372 unsigned int stack_realign = (incoming_stack_boundary
11373 < (crtl->is_leaf
11374 ? crtl->max_used_stack_slot_alignment
11375 : crtl->stack_alignment_needed));
11377 if (crtl->stack_realign_finalized)
11379 /* After stack_realign_needed is finalized, we can't no longer
11380 change it. */
11381 gcc_assert (crtl->stack_realign_needed == stack_realign);
11382 return;
11385 /* If the only reason for frame_pointer_needed is that we conservatively
11386 assumed stack realignment might be needed, but in the end nothing that
11387 needed the stack alignment had been spilled, clear frame_pointer_needed
11388 and say we don't need stack realignment. */
11389 if (stack_realign
11390 && frame_pointer_needed
11391 && crtl->is_leaf
11392 && flag_omit_frame_pointer
11393 && crtl->sp_is_unchanging
11394 && !ix86_current_function_calls_tls_descriptor
11395 && !crtl->accesses_prior_frames
11396 && !cfun->calls_alloca
11397 && !crtl->calls_eh_return
11398 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
11399 && !ix86_frame_pointer_required ()
11400 && get_frame_size () == 0
11401 && ix86_nsaved_sseregs () == 0
11402 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
11404 HARD_REG_SET set_up_by_prologue, prologue_used;
11405 basic_block bb;
11407 CLEAR_HARD_REG_SET (prologue_used);
11408 CLEAR_HARD_REG_SET (set_up_by_prologue);
11409 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
11410 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
11411 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
11412 HARD_FRAME_POINTER_REGNUM);
11413 FOR_EACH_BB_FN (bb, cfun)
11415 rtx_insn *insn;
11416 FOR_BB_INSNS (bb, insn)
11417 if (NONDEBUG_INSN_P (insn)
11418 && requires_stack_frame_p (insn, prologue_used,
11419 set_up_by_prologue))
11421 crtl->stack_realign_needed = stack_realign;
11422 crtl->stack_realign_finalized = true;
11423 return;
11427 /* If drap has been set, but it actually isn't live at the start
11428 of the function, there is no reason to set it up. */
11429 if (crtl->drap_reg)
11431 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11432 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
11434 crtl->drap_reg = NULL_RTX;
11435 crtl->need_drap = false;
11438 else
11439 cfun->machine->no_drap_save_restore = true;
11441 frame_pointer_needed = false;
11442 stack_realign = false;
11443 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
11444 crtl->stack_alignment_needed = incoming_stack_boundary;
11445 crtl->stack_alignment_estimated = incoming_stack_boundary;
11446 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
11447 crtl->preferred_stack_boundary = incoming_stack_boundary;
11448 df_finish_pass (true);
11449 df_scan_alloc (NULL);
11450 df_scan_blocks ();
11451 df_compute_regs_ever_live (true);
11452 df_analyze ();
11455 crtl->stack_realign_needed = stack_realign;
11456 crtl->stack_realign_finalized = true;
11459 /* Delete SET_GOT right after entry block if it is allocated to reg. */
11461 static void
11462 ix86_elim_entry_set_got (rtx reg)
11464 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11465 rtx_insn *c_insn = BB_HEAD (bb);
11466 if (!NONDEBUG_INSN_P (c_insn))
11467 c_insn = next_nonnote_nondebug_insn (c_insn);
11468 if (c_insn && NONJUMP_INSN_P (c_insn))
11470 rtx pat = PATTERN (c_insn);
11471 if (GET_CODE (pat) == PARALLEL)
11473 rtx vec = XVECEXP (pat, 0, 0);
11474 if (GET_CODE (vec) == SET
11475 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
11476 && REGNO (XEXP (vec, 0)) == REGNO (reg))
11477 delete_insn (c_insn);
11482 /* Expand the prologue into a bunch of separate insns. */
11484 void
11485 ix86_expand_prologue (void)
11487 struct machine_function *m = cfun->machine;
11488 rtx insn, t;
11489 struct ix86_frame frame;
11490 HOST_WIDE_INT allocate;
11491 bool int_registers_saved;
11492 bool sse_registers_saved;
11494 ix86_finalize_stack_realign_flags ();
11496 /* DRAP should not coexist with stack_realign_fp */
11497 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
11499 memset (&m->fs, 0, sizeof (m->fs));
11501 /* Initialize CFA state for before the prologue. */
11502 m->fs.cfa_reg = stack_pointer_rtx;
11503 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
11505 /* Track SP offset to the CFA. We continue tracking this after we've
11506 swapped the CFA register away from SP. In the case of re-alignment
11507 this is fudged; we're interested to offsets within the local frame. */
11508 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11509 m->fs.sp_valid = true;
11511 ix86_compute_frame_layout (&frame);
11513 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
11515 /* We should have already generated an error for any use of
11516 ms_hook on a nested function. */
11517 gcc_checking_assert (!ix86_static_chain_on_stack);
11519 /* Check if profiling is active and we shall use profiling before
11520 prologue variant. If so sorry. */
11521 if (crtl->profile && flag_fentry != 0)
11522 sorry ("ms_hook_prologue attribute isn%'t compatible "
11523 "with -mfentry for 32-bit");
11525 /* In ix86_asm_output_function_label we emitted:
11526 8b ff movl.s %edi,%edi
11527 55 push %ebp
11528 8b ec movl.s %esp,%ebp
11530 This matches the hookable function prologue in Win32 API
11531 functions in Microsoft Windows XP Service Pack 2 and newer.
11532 Wine uses this to enable Windows apps to hook the Win32 API
11533 functions provided by Wine.
11535 What that means is that we've already set up the frame pointer. */
11537 if (frame_pointer_needed
11538 && !(crtl->drap_reg && crtl->stack_realign_needed))
11540 rtx push, mov;
11542 /* We've decided to use the frame pointer already set up.
11543 Describe this to the unwinder by pretending that both
11544 push and mov insns happen right here.
11546 Putting the unwind info here at the end of the ms_hook
11547 is done so that we can make absolutely certain we get
11548 the required byte sequence at the start of the function,
11549 rather than relying on an assembler that can produce
11550 the exact encoding required.
11552 However it does mean (in the unpatched case) that we have
11553 a 1 insn window where the asynchronous unwind info is
11554 incorrect. However, if we placed the unwind info at
11555 its correct location we would have incorrect unwind info
11556 in the patched case. Which is probably all moot since
11557 I don't expect Wine generates dwarf2 unwind info for the
11558 system libraries that use this feature. */
11560 insn = emit_insn (gen_blockage ());
11562 push = gen_push (hard_frame_pointer_rtx);
11563 mov = gen_rtx_SET (hard_frame_pointer_rtx,
11564 stack_pointer_rtx);
11565 RTX_FRAME_RELATED_P (push) = 1;
11566 RTX_FRAME_RELATED_P (mov) = 1;
11568 RTX_FRAME_RELATED_P (insn) = 1;
11569 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11570 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
11572 /* Note that gen_push incremented m->fs.cfa_offset, even
11573 though we didn't emit the push insn here. */
11574 m->fs.cfa_reg = hard_frame_pointer_rtx;
11575 m->fs.fp_offset = m->fs.cfa_offset;
11576 m->fs.fp_valid = true;
11578 else
11580 /* The frame pointer is not needed so pop %ebp again.
11581 This leaves us with a pristine state. */
11582 emit_insn (gen_pop (hard_frame_pointer_rtx));
11586 /* The first insn of a function that accepts its static chain on the
11587 stack is to push the register that would be filled in by a direct
11588 call. This insn will be skipped by the trampoline. */
11589 else if (ix86_static_chain_on_stack)
11591 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
11592 emit_insn (gen_blockage ());
11594 /* We don't want to interpret this push insn as a register save,
11595 only as a stack adjustment. The real copy of the register as
11596 a save will be done later, if needed. */
11597 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
11598 t = gen_rtx_SET (stack_pointer_rtx, t);
11599 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
11600 RTX_FRAME_RELATED_P (insn) = 1;
11603 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
11604 of DRAP is needed and stack realignment is really needed after reload */
11605 if (stack_realign_drap)
11607 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11609 /* Only need to push parameter pointer reg if it is caller saved. */
11610 if (!call_used_regs[REGNO (crtl->drap_reg)])
11612 /* Push arg pointer reg */
11613 insn = emit_insn (gen_push (crtl->drap_reg));
11614 RTX_FRAME_RELATED_P (insn) = 1;
11617 /* Grab the argument pointer. */
11618 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
11619 insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
11620 RTX_FRAME_RELATED_P (insn) = 1;
11621 m->fs.cfa_reg = crtl->drap_reg;
11622 m->fs.cfa_offset = 0;
11624 /* Align the stack. */
11625 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11626 stack_pointer_rtx,
11627 GEN_INT (-align_bytes)));
11628 RTX_FRAME_RELATED_P (insn) = 1;
11630 /* Replicate the return address on the stack so that return
11631 address can be reached via (argp - 1) slot. This is needed
11632 to implement macro RETURN_ADDR_RTX and intrinsic function
11633 expand_builtin_return_addr etc. */
11634 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
11635 t = gen_frame_mem (word_mode, t);
11636 insn = emit_insn (gen_push (t));
11637 RTX_FRAME_RELATED_P (insn) = 1;
11639 /* For the purposes of frame and register save area addressing,
11640 we've started over with a new frame. */
11641 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11642 m->fs.realigned = true;
11645 int_registers_saved = (frame.nregs == 0);
11646 sse_registers_saved = (frame.nsseregs == 0);
11648 if (frame_pointer_needed && !m->fs.fp_valid)
11650 /* Note: AT&T enter does NOT have reversed args. Enter is probably
11651 slower on all targets. Also sdb doesn't like it. */
11652 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
11653 RTX_FRAME_RELATED_P (insn) = 1;
11655 /* Push registers now, before setting the frame pointer
11656 on SEH target. */
11657 if (!int_registers_saved
11658 && TARGET_SEH
11659 && !frame.save_regs_using_mov)
11661 ix86_emit_save_regs ();
11662 int_registers_saved = true;
11663 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11666 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
11668 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11669 RTX_FRAME_RELATED_P (insn) = 1;
11671 if (m->fs.cfa_reg == stack_pointer_rtx)
11672 m->fs.cfa_reg = hard_frame_pointer_rtx;
11673 m->fs.fp_offset = m->fs.sp_offset;
11674 m->fs.fp_valid = true;
11678 if (!int_registers_saved)
11680 /* If saving registers via PUSH, do so now. */
11681 if (!frame.save_regs_using_mov)
11683 ix86_emit_save_regs ();
11684 int_registers_saved = true;
11685 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11688 /* When using red zone we may start register saving before allocating
11689 the stack frame saving one cycle of the prologue. However, avoid
11690 doing this if we have to probe the stack; at least on x86_64 the
11691 stack probe can turn into a call that clobbers a red zone location. */
11692 else if (ix86_using_red_zone ()
11693 && (! TARGET_STACK_PROBE
11694 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
11696 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11697 int_registers_saved = true;
11701 if (stack_realign_fp)
11703 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11704 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
11706 /* The computation of the size of the re-aligned stack frame means
11707 that we must allocate the size of the register save area before
11708 performing the actual alignment. Otherwise we cannot guarantee
11709 that there's enough storage above the realignment point. */
11710 if (m->fs.sp_offset != frame.sse_reg_save_offset)
11711 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11712 GEN_INT (m->fs.sp_offset
11713 - frame.sse_reg_save_offset),
11714 -1, false);
11716 /* Align the stack. */
11717 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11718 stack_pointer_rtx,
11719 GEN_INT (-align_bytes)));
11721 /* For the purposes of register save area addressing, the stack
11722 pointer is no longer valid. As for the value of sp_offset,
11723 see ix86_compute_frame_layout, which we need to match in order
11724 to pass verification of stack_pointer_offset at the end. */
11725 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
11726 m->fs.sp_valid = false;
11729 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
11731 if (flag_stack_usage_info)
11733 /* We start to count from ARG_POINTER. */
11734 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
11736 /* If it was realigned, take into account the fake frame. */
11737 if (stack_realign_drap)
11739 if (ix86_static_chain_on_stack)
11740 stack_size += UNITS_PER_WORD;
11742 if (!call_used_regs[REGNO (crtl->drap_reg)])
11743 stack_size += UNITS_PER_WORD;
11745 /* This over-estimates by 1 minimal-stack-alignment-unit but
11746 mitigates that by counting in the new return address slot. */
11747 current_function_dynamic_stack_size
11748 += crtl->stack_alignment_needed / BITS_PER_UNIT;
11751 current_function_static_stack_size = stack_size;
11754 /* On SEH target with very large frame size, allocate an area to save
11755 SSE registers (as the very large allocation won't be described). */
11756 if (TARGET_SEH
11757 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
11758 && !sse_registers_saved)
11760 HOST_WIDE_INT sse_size =
11761 frame.sse_reg_save_offset - frame.reg_save_offset;
11763 gcc_assert (int_registers_saved);
11765 /* No need to do stack checking as the area will be immediately
11766 written. */
11767 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11768 GEN_INT (-sse_size), -1,
11769 m->fs.cfa_reg == stack_pointer_rtx);
11770 allocate -= sse_size;
11771 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11772 sse_registers_saved = true;
11775 /* The stack has already been decremented by the instruction calling us
11776 so probe if the size is non-negative to preserve the protection area. */
11777 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11779 /* We expect the registers to be saved when probes are used. */
11780 gcc_assert (int_registers_saved);
11782 if (STACK_CHECK_MOVING_SP)
11784 if (!(crtl->is_leaf && !cfun->calls_alloca
11785 && allocate <= PROBE_INTERVAL))
11787 ix86_adjust_stack_and_probe (allocate);
11788 allocate = 0;
11791 else
11793 HOST_WIDE_INT size = allocate;
11795 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11796 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11798 if (TARGET_STACK_PROBE)
11800 if (crtl->is_leaf && !cfun->calls_alloca)
11802 if (size > PROBE_INTERVAL)
11803 ix86_emit_probe_stack_range (0, size);
11805 else
11806 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11808 else
11810 if (crtl->is_leaf && !cfun->calls_alloca)
11812 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11813 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11814 size - STACK_CHECK_PROTECT);
11816 else
11817 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11822 if (allocate == 0)
11824 else if (!ix86_target_stack_probe ()
11825 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11827 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11828 GEN_INT (-allocate), -1,
11829 m->fs.cfa_reg == stack_pointer_rtx);
11831 else
11833 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11834 rtx r10 = NULL;
11835 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11836 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11837 bool eax_live = ix86_eax_live_at_start_p ();
11838 bool r10_live = false;
11840 if (TARGET_64BIT)
11841 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11843 if (eax_live)
11845 insn = emit_insn (gen_push (eax));
11846 allocate -= UNITS_PER_WORD;
11847 /* Note that SEH directives need to continue tracking the stack
11848 pointer even after the frame pointer has been set up. */
11849 if (sp_is_cfa_reg || TARGET_SEH)
11851 if (sp_is_cfa_reg)
11852 m->fs.cfa_offset += UNITS_PER_WORD;
11853 RTX_FRAME_RELATED_P (insn) = 1;
11854 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11855 gen_rtx_SET (stack_pointer_rtx,
11856 plus_constant (Pmode, stack_pointer_rtx,
11857 -UNITS_PER_WORD)));
11861 if (r10_live)
11863 r10 = gen_rtx_REG (Pmode, R10_REG);
11864 insn = emit_insn (gen_push (r10));
11865 allocate -= UNITS_PER_WORD;
11866 if (sp_is_cfa_reg || TARGET_SEH)
11868 if (sp_is_cfa_reg)
11869 m->fs.cfa_offset += UNITS_PER_WORD;
11870 RTX_FRAME_RELATED_P (insn) = 1;
11871 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11872 gen_rtx_SET (stack_pointer_rtx,
11873 plus_constant (Pmode, stack_pointer_rtx,
11874 -UNITS_PER_WORD)));
11878 emit_move_insn (eax, GEN_INT (allocate));
11879 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11881 /* Use the fact that AX still contains ALLOCATE. */
11882 adjust_stack_insn = (Pmode == DImode
11883 ? gen_pro_epilogue_adjust_stack_di_sub
11884 : gen_pro_epilogue_adjust_stack_si_sub);
11886 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11887 stack_pointer_rtx, eax));
11889 if (sp_is_cfa_reg || TARGET_SEH)
11891 if (sp_is_cfa_reg)
11892 m->fs.cfa_offset += allocate;
11893 RTX_FRAME_RELATED_P (insn) = 1;
11894 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11895 gen_rtx_SET (stack_pointer_rtx,
11896 plus_constant (Pmode, stack_pointer_rtx,
11897 -allocate)));
11899 m->fs.sp_offset += allocate;
11901 /* Use stack_pointer_rtx for relative addressing so that code
11902 works for realigned stack, too. */
11903 if (r10_live && eax_live)
11905 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11906 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11907 gen_frame_mem (word_mode, t));
11908 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11909 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11910 gen_frame_mem (word_mode, t));
11912 else if (eax_live || r10_live)
11914 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11915 emit_move_insn (gen_rtx_REG (word_mode,
11916 (eax_live ? AX_REG : R10_REG)),
11917 gen_frame_mem (word_mode, t));
11920 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11922 /* If we havn't already set up the frame pointer, do so now. */
11923 if (frame_pointer_needed && !m->fs.fp_valid)
11925 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11926 GEN_INT (frame.stack_pointer_offset
11927 - frame.hard_frame_pointer_offset));
11928 insn = emit_insn (insn);
11929 RTX_FRAME_RELATED_P (insn) = 1;
11930 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11932 if (m->fs.cfa_reg == stack_pointer_rtx)
11933 m->fs.cfa_reg = hard_frame_pointer_rtx;
11934 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11935 m->fs.fp_valid = true;
11938 if (!int_registers_saved)
11939 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11940 if (!sse_registers_saved)
11941 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11943 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
11944 in PROLOGUE. */
11945 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
11947 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
11948 insn = emit_insn (gen_set_got (pic));
11949 RTX_FRAME_RELATED_P (insn) = 1;
11950 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11951 emit_insn (gen_prologue_use (pic));
11952 /* Deleting already emmitted SET_GOT if exist and allocated to
11953 REAL_PIC_OFFSET_TABLE_REGNUM. */
11954 ix86_elim_entry_set_got (pic);
11957 if (crtl->drap_reg && !crtl->stack_realign_needed)
11959 /* vDRAP is setup but after reload it turns out stack realign
11960 isn't necessary, here we will emit prologue to setup DRAP
11961 without stack realign adjustment */
11962 t = choose_baseaddr (0);
11963 emit_insn (gen_rtx_SET (crtl->drap_reg, t));
11966 /* Prevent instructions from being scheduled into register save push
11967 sequence when access to the redzone area is done through frame pointer.
11968 The offset between the frame pointer and the stack pointer is calculated
11969 relative to the value of the stack pointer at the end of the function
11970 prologue, and moving instructions that access redzone area via frame
11971 pointer inside push sequence violates this assumption. */
11972 if (frame_pointer_needed && frame.red_zone_size)
11973 emit_insn (gen_memory_blockage ());
11975 /* Emit cld instruction if stringops are used in the function. */
11976 if (TARGET_CLD && ix86_current_function_needs_cld)
11977 emit_insn (gen_cld ());
11979 /* SEH requires that the prologue end within 256 bytes of the start of
11980 the function. Prevent instruction schedules that would extend that.
11981 Further, prevent alloca modifications to the stack pointer from being
11982 combined with prologue modifications. */
11983 if (TARGET_SEH)
11984 emit_insn (gen_prologue_use (stack_pointer_rtx));
11987 /* Emit code to restore REG using a POP insn. */
11989 static void
11990 ix86_emit_restore_reg_using_pop (rtx reg)
11992 struct machine_function *m = cfun->machine;
11993 rtx_insn *insn = emit_insn (gen_pop (reg));
11995 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11996 m->fs.sp_offset -= UNITS_PER_WORD;
11998 if (m->fs.cfa_reg == crtl->drap_reg
11999 && REGNO (reg) == REGNO (crtl->drap_reg))
12001 /* Previously we'd represented the CFA as an expression
12002 like *(%ebp - 8). We've just popped that value from
12003 the stack, which means we need to reset the CFA to
12004 the drap register. This will remain until we restore
12005 the stack pointer. */
12006 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
12007 RTX_FRAME_RELATED_P (insn) = 1;
12009 /* This means that the DRAP register is valid for addressing too. */
12010 m->fs.drap_valid = true;
12011 return;
12014 if (m->fs.cfa_reg == stack_pointer_rtx)
12016 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
12017 x = gen_rtx_SET (stack_pointer_rtx, x);
12018 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
12019 RTX_FRAME_RELATED_P (insn) = 1;
12021 m->fs.cfa_offset -= UNITS_PER_WORD;
12024 /* When the frame pointer is the CFA, and we pop it, we are
12025 swapping back to the stack pointer as the CFA. This happens
12026 for stack frames that don't allocate other data, so we assume
12027 the stack pointer is now pointing at the return address, i.e.
12028 the function entry state, which makes the offset be 1 word. */
12029 if (reg == hard_frame_pointer_rtx)
12031 m->fs.fp_valid = false;
12032 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
12034 m->fs.cfa_reg = stack_pointer_rtx;
12035 m->fs.cfa_offset -= UNITS_PER_WORD;
12037 add_reg_note (insn, REG_CFA_DEF_CFA,
12038 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12039 GEN_INT (m->fs.cfa_offset)));
12040 RTX_FRAME_RELATED_P (insn) = 1;
12045 /* Emit code to restore saved registers using POP insns. */
12047 static void
12048 ix86_emit_restore_regs_using_pop (void)
12050 unsigned int regno;
12052 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12053 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
12054 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
12057 /* Emit code and notes for the LEAVE instruction. */
12059 static void
12060 ix86_emit_leave (void)
12062 struct machine_function *m = cfun->machine;
12063 rtx_insn *insn = emit_insn (ix86_gen_leave ());
12065 ix86_add_queued_cfa_restore_notes (insn);
12067 gcc_assert (m->fs.fp_valid);
12068 m->fs.sp_valid = true;
12069 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
12070 m->fs.fp_valid = false;
12072 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
12074 m->fs.cfa_reg = stack_pointer_rtx;
12075 m->fs.cfa_offset = m->fs.sp_offset;
12077 add_reg_note (insn, REG_CFA_DEF_CFA,
12078 plus_constant (Pmode, stack_pointer_rtx,
12079 m->fs.sp_offset));
12080 RTX_FRAME_RELATED_P (insn) = 1;
12082 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
12083 m->fs.fp_offset);
12086 /* Emit code to restore saved registers using MOV insns.
12087 First register is restored from CFA - CFA_OFFSET. */
12088 static void
12089 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
12090 bool maybe_eh_return)
12092 struct machine_function *m = cfun->machine;
12093 unsigned int regno;
12095 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12096 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
12098 rtx reg = gen_rtx_REG (word_mode, regno);
12099 rtx mem;
12100 rtx_insn *insn;
12102 mem = choose_baseaddr (cfa_offset);
12103 mem = gen_frame_mem (word_mode, mem);
12104 insn = emit_move_insn (reg, mem);
12106 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
12108 /* Previously we'd represented the CFA as an expression
12109 like *(%ebp - 8). We've just popped that value from
12110 the stack, which means we need to reset the CFA to
12111 the drap register. This will remain until we restore
12112 the stack pointer. */
12113 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
12114 RTX_FRAME_RELATED_P (insn) = 1;
12116 /* This means that the DRAP register is valid for addressing. */
12117 m->fs.drap_valid = true;
12119 else
12120 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
12122 cfa_offset -= UNITS_PER_WORD;
12126 /* Emit code to restore saved registers using MOV insns.
12127 First register is restored from CFA - CFA_OFFSET. */
12128 static void
12129 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
12130 bool maybe_eh_return)
12132 unsigned int regno;
12134 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12135 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
12137 rtx reg = gen_rtx_REG (V4SFmode, regno);
12138 rtx mem;
12140 mem = choose_baseaddr (cfa_offset);
12141 mem = gen_rtx_MEM (V4SFmode, mem);
12142 set_mem_align (mem, 128);
12143 emit_move_insn (reg, mem);
12145 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
12147 cfa_offset -= 16;
12151 /* Restore function stack, frame, and registers. */
12153 void
12154 ix86_expand_epilogue (int style)
12156 struct machine_function *m = cfun->machine;
12157 struct machine_frame_state frame_state_save = m->fs;
12158 struct ix86_frame frame;
12159 bool restore_regs_via_mov;
12160 bool using_drap;
12162 ix86_finalize_stack_realign_flags ();
12163 ix86_compute_frame_layout (&frame);
12165 m->fs.sp_valid = (!frame_pointer_needed
12166 || (crtl->sp_is_unchanging
12167 && !stack_realign_fp));
12168 gcc_assert (!m->fs.sp_valid
12169 || m->fs.sp_offset == frame.stack_pointer_offset);
12171 /* The FP must be valid if the frame pointer is present. */
12172 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
12173 gcc_assert (!m->fs.fp_valid
12174 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
12176 /* We must have *some* valid pointer to the stack frame. */
12177 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
12179 /* The DRAP is never valid at this point. */
12180 gcc_assert (!m->fs.drap_valid);
12182 /* See the comment about red zone and frame
12183 pointer usage in ix86_expand_prologue. */
12184 if (frame_pointer_needed && frame.red_zone_size)
12185 emit_insn (gen_memory_blockage ());
12187 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
12188 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
12190 /* Determine the CFA offset of the end of the red-zone. */
12191 m->fs.red_zone_offset = 0;
12192 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
12194 /* The red-zone begins below the return address. */
12195 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
12197 /* When the register save area is in the aligned portion of
12198 the stack, determine the maximum runtime displacement that
12199 matches up with the aligned frame. */
12200 if (stack_realign_drap)
12201 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
12202 + UNITS_PER_WORD);
12205 /* Special care must be taken for the normal return case of a function
12206 using eh_return: the eax and edx registers are marked as saved, but
12207 not restored along this path. Adjust the save location to match. */
12208 if (crtl->calls_eh_return && style != 2)
12209 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
12211 /* EH_RETURN requires the use of moves to function properly. */
12212 if (crtl->calls_eh_return)
12213 restore_regs_via_mov = true;
12214 /* SEH requires the use of pops to identify the epilogue. */
12215 else if (TARGET_SEH)
12216 restore_regs_via_mov = false;
12217 /* If we're only restoring one register and sp is not valid then
12218 using a move instruction to restore the register since it's
12219 less work than reloading sp and popping the register. */
12220 else if (!m->fs.sp_valid && frame.nregs <= 1)
12221 restore_regs_via_mov = true;
12222 else if (TARGET_EPILOGUE_USING_MOVE
12223 && cfun->machine->use_fast_prologue_epilogue
12224 && (frame.nregs > 1
12225 || m->fs.sp_offset != frame.reg_save_offset))
12226 restore_regs_via_mov = true;
12227 else if (frame_pointer_needed
12228 && !frame.nregs
12229 && m->fs.sp_offset != frame.reg_save_offset)
12230 restore_regs_via_mov = true;
12231 else if (frame_pointer_needed
12232 && TARGET_USE_LEAVE
12233 && cfun->machine->use_fast_prologue_epilogue
12234 && frame.nregs == 1)
12235 restore_regs_via_mov = true;
12236 else
12237 restore_regs_via_mov = false;
12239 if (restore_regs_via_mov || frame.nsseregs)
12241 /* Ensure that the entire register save area is addressable via
12242 the stack pointer, if we will restore via sp. */
12243 if (TARGET_64BIT
12244 && m->fs.sp_offset > 0x7fffffff
12245 && !(m->fs.fp_valid || m->fs.drap_valid)
12246 && (frame.nsseregs + frame.nregs) != 0)
12248 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12249 GEN_INT (m->fs.sp_offset
12250 - frame.sse_reg_save_offset),
12251 style,
12252 m->fs.cfa_reg == stack_pointer_rtx);
12256 /* If there are any SSE registers to restore, then we have to do it
12257 via moves, since there's obviously no pop for SSE regs. */
12258 if (frame.nsseregs)
12259 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
12260 style == 2);
12262 if (restore_regs_via_mov)
12264 rtx t;
12266 if (frame.nregs)
12267 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
12269 /* eh_return epilogues need %ecx added to the stack pointer. */
12270 if (style == 2)
12272 rtx sa = EH_RETURN_STACKADJ_RTX;
12273 rtx_insn *insn;
12275 /* Stack align doesn't work with eh_return. */
12276 gcc_assert (!stack_realign_drap);
12277 /* Neither does regparm nested functions. */
12278 gcc_assert (!ix86_static_chain_on_stack);
12280 if (frame_pointer_needed)
12282 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
12283 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
12284 emit_insn (gen_rtx_SET (sa, t));
12286 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
12287 insn = emit_move_insn (hard_frame_pointer_rtx, t);
12289 /* Note that we use SA as a temporary CFA, as the return
12290 address is at the proper place relative to it. We
12291 pretend this happens at the FP restore insn because
12292 prior to this insn the FP would be stored at the wrong
12293 offset relative to SA, and after this insn we have no
12294 other reasonable register to use for the CFA. We don't
12295 bother resetting the CFA to the SP for the duration of
12296 the return insn. */
12297 add_reg_note (insn, REG_CFA_DEF_CFA,
12298 plus_constant (Pmode, sa, UNITS_PER_WORD));
12299 ix86_add_queued_cfa_restore_notes (insn);
12300 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
12301 RTX_FRAME_RELATED_P (insn) = 1;
12303 m->fs.cfa_reg = sa;
12304 m->fs.cfa_offset = UNITS_PER_WORD;
12305 m->fs.fp_valid = false;
12307 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
12308 const0_rtx, style, false);
12310 else
12312 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
12313 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
12314 insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
12315 ix86_add_queued_cfa_restore_notes (insn);
12317 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
12318 if (m->fs.cfa_offset != UNITS_PER_WORD)
12320 m->fs.cfa_offset = UNITS_PER_WORD;
12321 add_reg_note (insn, REG_CFA_DEF_CFA,
12322 plus_constant (Pmode, stack_pointer_rtx,
12323 UNITS_PER_WORD));
12324 RTX_FRAME_RELATED_P (insn) = 1;
12327 m->fs.sp_offset = UNITS_PER_WORD;
12328 m->fs.sp_valid = true;
12331 else
12333 /* SEH requires that the function end with (1) a stack adjustment
12334 if necessary, (2) a sequence of pops, and (3) a return or
12335 jump instruction. Prevent insns from the function body from
12336 being scheduled into this sequence. */
12337 if (TARGET_SEH)
12339 /* Prevent a catch region from being adjacent to the standard
12340 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
12341 several other flags that would be interesting to test are
12342 not yet set up. */
12343 if (flag_non_call_exceptions)
12344 emit_insn (gen_nops (const1_rtx));
12345 else
12346 emit_insn (gen_blockage ());
12349 /* First step is to deallocate the stack frame so that we can
12350 pop the registers. Also do it on SEH target for very large
12351 frame as the emitted instructions aren't allowed by the ABI in
12352 epilogues. */
12353 if (!m->fs.sp_valid
12354 || (TARGET_SEH
12355 && (m->fs.sp_offset - frame.reg_save_offset
12356 >= SEH_MAX_FRAME_SIZE)))
12358 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
12359 GEN_INT (m->fs.fp_offset
12360 - frame.reg_save_offset),
12361 style, false);
12363 else if (m->fs.sp_offset != frame.reg_save_offset)
12365 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12366 GEN_INT (m->fs.sp_offset
12367 - frame.reg_save_offset),
12368 style,
12369 m->fs.cfa_reg == stack_pointer_rtx);
12372 ix86_emit_restore_regs_using_pop ();
12375 /* If we used a stack pointer and haven't already got rid of it,
12376 then do so now. */
12377 if (m->fs.fp_valid)
12379 /* If the stack pointer is valid and pointing at the frame
12380 pointer store address, then we only need a pop. */
12381 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
12382 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12383 /* Leave results in shorter dependency chains on CPUs that are
12384 able to grok it fast. */
12385 else if (TARGET_USE_LEAVE
12386 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
12387 || !cfun->machine->use_fast_prologue_epilogue)
12388 ix86_emit_leave ();
12389 else
12391 pro_epilogue_adjust_stack (stack_pointer_rtx,
12392 hard_frame_pointer_rtx,
12393 const0_rtx, style, !using_drap);
12394 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12398 if (using_drap)
12400 int param_ptr_offset = UNITS_PER_WORD;
12401 rtx_insn *insn;
12403 gcc_assert (stack_realign_drap);
12405 if (ix86_static_chain_on_stack)
12406 param_ptr_offset += UNITS_PER_WORD;
12407 if (!call_used_regs[REGNO (crtl->drap_reg)])
12408 param_ptr_offset += UNITS_PER_WORD;
12410 insn = emit_insn (gen_rtx_SET
12411 (stack_pointer_rtx,
12412 gen_rtx_PLUS (Pmode,
12413 crtl->drap_reg,
12414 GEN_INT (-param_ptr_offset))));
12415 m->fs.cfa_reg = stack_pointer_rtx;
12416 m->fs.cfa_offset = param_ptr_offset;
12417 m->fs.sp_offset = param_ptr_offset;
12418 m->fs.realigned = false;
12420 add_reg_note (insn, REG_CFA_DEF_CFA,
12421 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12422 GEN_INT (param_ptr_offset)));
12423 RTX_FRAME_RELATED_P (insn) = 1;
12425 if (!call_used_regs[REGNO (crtl->drap_reg)])
12426 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
12429 /* At this point the stack pointer must be valid, and we must have
12430 restored all of the registers. We may not have deallocated the
12431 entire stack frame. We've delayed this until now because it may
12432 be possible to merge the local stack deallocation with the
12433 deallocation forced by ix86_static_chain_on_stack. */
12434 gcc_assert (m->fs.sp_valid);
12435 gcc_assert (!m->fs.fp_valid);
12436 gcc_assert (!m->fs.realigned);
12437 if (m->fs.sp_offset != UNITS_PER_WORD)
12439 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12440 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
12441 style, true);
12443 else
12444 ix86_add_queued_cfa_restore_notes (get_last_insn ());
12446 /* Sibcall epilogues don't want a return instruction. */
12447 if (style == 0)
12449 m->fs = frame_state_save;
12450 return;
12453 if (crtl->args.pops_args && crtl->args.size)
12455 rtx popc = GEN_INT (crtl->args.pops_args);
12457 /* i386 can only pop 64K bytes. If asked to pop more, pop return
12458 address, do explicit add, and jump indirectly to the caller. */
12460 if (crtl->args.pops_args >= 65536)
12462 rtx ecx = gen_rtx_REG (SImode, CX_REG);
12463 rtx_insn *insn;
12465 /* There is no "pascal" calling convention in any 64bit ABI. */
12466 gcc_assert (!TARGET_64BIT);
12468 insn = emit_insn (gen_pop (ecx));
12469 m->fs.cfa_offset -= UNITS_PER_WORD;
12470 m->fs.sp_offset -= UNITS_PER_WORD;
12472 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
12473 x = gen_rtx_SET (stack_pointer_rtx, x);
12474 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
12475 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
12476 RTX_FRAME_RELATED_P (insn) = 1;
12478 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12479 popc, -1, true);
12480 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
12482 else
12483 emit_jump_insn (gen_simple_return_pop_internal (popc));
12485 else
12486 emit_jump_insn (gen_simple_return_internal ());
12488 /* Restore the state back to the state from the prologue,
12489 so that it's correct for the next epilogue. */
12490 m->fs = frame_state_save;
12493 /* Reset from the function's potential modifications. */
12495 static void
12496 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
12498 if (pic_offset_table_rtx
12499 && !ix86_use_pseudo_pic_reg ())
12500 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
12501 #if TARGET_MACHO
12502 /* Mach-O doesn't support labels at the end of objects, so if
12503 it looks like we might want one, insert a NOP. */
12505 rtx_insn *insn = get_last_insn ();
12506 rtx_insn *deleted_debug_label = NULL;
12507 while (insn
12508 && NOTE_P (insn)
12509 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
12511 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
12512 notes only, instead set their CODE_LABEL_NUMBER to -1,
12513 otherwise there would be code generation differences
12514 in between -g and -g0. */
12515 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12516 deleted_debug_label = insn;
12517 insn = PREV_INSN (insn);
12519 if (insn
12520 && (LABEL_P (insn)
12521 || (NOTE_P (insn)
12522 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
12523 fputs ("\tnop\n", file);
12524 else if (deleted_debug_label)
12525 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
12526 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12527 CODE_LABEL_NUMBER (insn) = -1;
12529 #endif
12533 /* Return a scratch register to use in the split stack prologue. The
12534 split stack prologue is used for -fsplit-stack. It is the first
12535 instructions in the function, even before the regular prologue.
12536 The scratch register can be any caller-saved register which is not
12537 used for parameters or for the static chain. */
12539 static unsigned int
12540 split_stack_prologue_scratch_regno (void)
12542 if (TARGET_64BIT)
12543 return R11_REG;
12544 else
12546 bool is_fastcall, is_thiscall;
12547 int regparm;
12549 is_fastcall = (lookup_attribute ("fastcall",
12550 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12551 != NULL);
12552 is_thiscall = (lookup_attribute ("thiscall",
12553 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12554 != NULL);
12555 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
12557 if (is_fastcall)
12559 if (DECL_STATIC_CHAIN (cfun->decl))
12561 sorry ("-fsplit-stack does not support fastcall with "
12562 "nested function");
12563 return INVALID_REGNUM;
12565 return AX_REG;
12567 else if (is_thiscall)
12569 if (!DECL_STATIC_CHAIN (cfun->decl))
12570 return DX_REG;
12571 return AX_REG;
12573 else if (regparm < 3)
12575 if (!DECL_STATIC_CHAIN (cfun->decl))
12576 return CX_REG;
12577 else
12579 if (regparm >= 2)
12581 sorry ("-fsplit-stack does not support 2 register "
12582 "parameters for a nested function");
12583 return INVALID_REGNUM;
12585 return DX_REG;
12588 else
12590 /* FIXME: We could make this work by pushing a register
12591 around the addition and comparison. */
12592 sorry ("-fsplit-stack does not support 3 register parameters");
12593 return INVALID_REGNUM;
12598 /* A SYMBOL_REF for the function which allocates new stackspace for
12599 -fsplit-stack. */
12601 static GTY(()) rtx split_stack_fn;
12603 /* A SYMBOL_REF for the more stack function when using the large
12604 model. */
12606 static GTY(()) rtx split_stack_fn_large;
12608 /* Handle -fsplit-stack. These are the first instructions in the
12609 function, even before the regular prologue. */
12611 void
12612 ix86_expand_split_stack_prologue (void)
12614 struct ix86_frame frame;
12615 HOST_WIDE_INT allocate;
12616 unsigned HOST_WIDE_INT args_size;
12617 rtx_code_label *label;
12618 rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
12619 rtx scratch_reg = NULL_RTX;
12620 rtx_code_label *varargs_label = NULL;
12621 rtx fn;
12623 gcc_assert (flag_split_stack && reload_completed);
12625 ix86_finalize_stack_realign_flags ();
12626 ix86_compute_frame_layout (&frame);
12627 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
12629 /* This is the label we will branch to if we have enough stack
12630 space. We expect the basic block reordering pass to reverse this
12631 branch if optimizing, so that we branch in the unlikely case. */
12632 label = gen_label_rtx ();
12634 /* We need to compare the stack pointer minus the frame size with
12635 the stack boundary in the TCB. The stack boundary always gives
12636 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
12637 can compare directly. Otherwise we need to do an addition. */
12639 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12640 UNSPEC_STACK_CHECK);
12641 limit = gen_rtx_CONST (Pmode, limit);
12642 limit = gen_rtx_MEM (Pmode, limit);
12643 if (allocate < SPLIT_STACK_AVAILABLE)
12644 current = stack_pointer_rtx;
12645 else
12647 unsigned int scratch_regno;
12648 rtx offset;
12650 /* We need a scratch register to hold the stack pointer minus
12651 the required frame size. Since this is the very start of the
12652 function, the scratch register can be any caller-saved
12653 register which is not used for parameters. */
12654 offset = GEN_INT (- allocate);
12655 scratch_regno = split_stack_prologue_scratch_regno ();
12656 if (scratch_regno == INVALID_REGNUM)
12657 return;
12658 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12659 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
12661 /* We don't use ix86_gen_add3 in this case because it will
12662 want to split to lea, but when not optimizing the insn
12663 will not be split after this point. */
12664 emit_insn (gen_rtx_SET (scratch_reg,
12665 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12666 offset)));
12668 else
12670 emit_move_insn (scratch_reg, offset);
12671 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
12672 stack_pointer_rtx));
12674 current = scratch_reg;
12677 ix86_expand_branch (GEU, current, limit, label);
12678 jump_insn = get_last_insn ();
12679 JUMP_LABEL (jump_insn) = label;
12681 /* Mark the jump as very likely to be taken. */
12682 add_int_reg_note (jump_insn, REG_BR_PROB,
12683 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
12685 if (split_stack_fn == NULL_RTX)
12687 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
12688 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
12690 fn = split_stack_fn;
12692 /* Get more stack space. We pass in the desired stack space and the
12693 size of the arguments to copy to the new stack. In 32-bit mode
12694 we push the parameters; __morestack will return on a new stack
12695 anyhow. In 64-bit mode we pass the parameters in r10 and
12696 r11. */
12697 allocate_rtx = GEN_INT (allocate);
12698 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
12699 call_fusage = NULL_RTX;
12700 if (TARGET_64BIT)
12702 rtx reg10, reg11;
12704 reg10 = gen_rtx_REG (Pmode, R10_REG);
12705 reg11 = gen_rtx_REG (Pmode, R11_REG);
12707 /* If this function uses a static chain, it will be in %r10.
12708 Preserve it across the call to __morestack. */
12709 if (DECL_STATIC_CHAIN (cfun->decl))
12711 rtx rax;
12713 rax = gen_rtx_REG (word_mode, AX_REG);
12714 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
12715 use_reg (&call_fusage, rax);
12718 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
12719 && !TARGET_PECOFF)
12721 HOST_WIDE_INT argval;
12723 gcc_assert (Pmode == DImode);
12724 /* When using the large model we need to load the address
12725 into a register, and we've run out of registers. So we
12726 switch to a different calling convention, and we call a
12727 different function: __morestack_large. We pass the
12728 argument size in the upper 32 bits of r10 and pass the
12729 frame size in the lower 32 bits. */
12730 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
12731 gcc_assert ((args_size & 0xffffffff) == args_size);
12733 if (split_stack_fn_large == NULL_RTX)
12735 split_stack_fn_large =
12736 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
12737 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
12739 if (ix86_cmodel == CM_LARGE_PIC)
12741 rtx_code_label *label;
12742 rtx x;
12744 label = gen_label_rtx ();
12745 emit_label (label);
12746 LABEL_PRESERVE_P (label) = 1;
12747 emit_insn (gen_set_rip_rex64 (reg10, label));
12748 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12749 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12750 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12751 UNSPEC_GOT);
12752 x = gen_rtx_CONST (Pmode, x);
12753 emit_move_insn (reg11, x);
12754 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12755 x = gen_const_mem (Pmode, x);
12756 emit_move_insn (reg11, x);
12758 else
12759 emit_move_insn (reg11, split_stack_fn_large);
12761 fn = reg11;
12763 argval = ((args_size << 16) << 16) + allocate;
12764 emit_move_insn (reg10, GEN_INT (argval));
12766 else
12768 emit_move_insn (reg10, allocate_rtx);
12769 emit_move_insn (reg11, GEN_INT (args_size));
12770 use_reg (&call_fusage, reg11);
12773 use_reg (&call_fusage, reg10);
12775 else
12777 emit_insn (gen_push (GEN_INT (args_size)));
12778 emit_insn (gen_push (allocate_rtx));
12780 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12781 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12782 NULL_RTX, false);
12783 add_function_usage_to (call_insn, call_fusage);
12785 /* In order to make call/return prediction work right, we now need
12786 to execute a return instruction. See
12787 libgcc/config/i386/morestack.S for the details on how this works.
12789 For flow purposes gcc must not see this as a return
12790 instruction--we need control flow to continue at the subsequent
12791 label. Therefore, we use an unspec. */
12792 gcc_assert (crtl->args.pops_args < 65536);
12793 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12795 /* If we are in 64-bit mode and this function uses a static chain,
12796 we saved %r10 in %rax before calling _morestack. */
12797 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12798 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12799 gen_rtx_REG (word_mode, AX_REG));
12801 /* If this function calls va_start, we need to store a pointer to
12802 the arguments on the old stack, because they may not have been
12803 all copied to the new stack. At this point the old stack can be
12804 found at the frame pointer value used by __morestack, because
12805 __morestack has set that up before calling back to us. Here we
12806 store that pointer in a scratch register, and in
12807 ix86_expand_prologue we store the scratch register in a stack
12808 slot. */
12809 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12811 unsigned int scratch_regno;
12812 rtx frame_reg;
12813 int words;
12815 scratch_regno = split_stack_prologue_scratch_regno ();
12816 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12817 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12819 /* 64-bit:
12820 fp -> old fp value
12821 return address within this function
12822 return address of caller of this function
12823 stack arguments
12824 So we add three words to get to the stack arguments.
12826 32-bit:
12827 fp -> old fp value
12828 return address within this function
12829 first argument to __morestack
12830 second argument to __morestack
12831 return address of caller of this function
12832 stack arguments
12833 So we add five words to get to the stack arguments.
12835 words = TARGET_64BIT ? 3 : 5;
12836 emit_insn (gen_rtx_SET (scratch_reg,
12837 gen_rtx_PLUS (Pmode, frame_reg,
12838 GEN_INT (words * UNITS_PER_WORD))));
12840 varargs_label = gen_label_rtx ();
12841 emit_jump_insn (gen_jump (varargs_label));
12842 JUMP_LABEL (get_last_insn ()) = varargs_label;
12844 emit_barrier ();
12847 emit_label (label);
12848 LABEL_NUSES (label) = 1;
12850 /* If this function calls va_start, we now have to set the scratch
12851 register for the case where we do not call __morestack. In this
12852 case we need to set it based on the stack pointer. */
12853 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12855 emit_insn (gen_rtx_SET (scratch_reg,
12856 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12857 GEN_INT (UNITS_PER_WORD))));
12859 emit_label (varargs_label);
12860 LABEL_NUSES (varargs_label) = 1;
12864 /* We may have to tell the dataflow pass that the split stack prologue
12865 is initializing a scratch register. */
12867 static void
12868 ix86_live_on_entry (bitmap regs)
12870 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12872 gcc_assert (flag_split_stack);
12873 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12877 /* Extract the parts of an RTL expression that is a valid memory address
12878 for an instruction. Return 0 if the structure of the address is
12879 grossly off. Return -1 if the address contains ASHIFT, so it is not
12880 strictly valid, but still used for computing length of lea instruction. */
12883 ix86_decompose_address (rtx addr, struct ix86_address *out)
12885 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12886 rtx base_reg, index_reg;
12887 HOST_WIDE_INT scale = 1;
12888 rtx scale_rtx = NULL_RTX;
12889 rtx tmp;
12890 int retval = 1;
12891 enum ix86_address_seg seg = SEG_DEFAULT;
12893 /* Allow zero-extended SImode addresses,
12894 they will be emitted with addr32 prefix. */
12895 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12897 if (GET_CODE (addr) == ZERO_EXTEND
12898 && GET_MODE (XEXP (addr, 0)) == SImode)
12900 addr = XEXP (addr, 0);
12901 if (CONST_INT_P (addr))
12902 return 0;
12904 else if (GET_CODE (addr) == AND
12905 && const_32bit_mask (XEXP (addr, 1), DImode))
12907 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12908 if (addr == NULL_RTX)
12909 return 0;
12911 if (CONST_INT_P (addr))
12912 return 0;
12916 /* Allow SImode subregs of DImode addresses,
12917 they will be emitted with addr32 prefix. */
12918 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12920 if (GET_CODE (addr) == SUBREG
12921 && GET_MODE (SUBREG_REG (addr)) == DImode)
12923 addr = SUBREG_REG (addr);
12924 if (CONST_INT_P (addr))
12925 return 0;
12929 if (REG_P (addr))
12930 base = addr;
12931 else if (GET_CODE (addr) == SUBREG)
12933 if (REG_P (SUBREG_REG (addr)))
12934 base = addr;
12935 else
12936 return 0;
12938 else if (GET_CODE (addr) == PLUS)
12940 rtx addends[4], op;
12941 int n = 0, i;
12943 op = addr;
12946 if (n >= 4)
12947 return 0;
12948 addends[n++] = XEXP (op, 1);
12949 op = XEXP (op, 0);
12951 while (GET_CODE (op) == PLUS);
12952 if (n >= 4)
12953 return 0;
12954 addends[n] = op;
12956 for (i = n; i >= 0; --i)
12958 op = addends[i];
12959 switch (GET_CODE (op))
12961 case MULT:
12962 if (index)
12963 return 0;
12964 index = XEXP (op, 0);
12965 scale_rtx = XEXP (op, 1);
12966 break;
12968 case ASHIFT:
12969 if (index)
12970 return 0;
12971 index = XEXP (op, 0);
12972 tmp = XEXP (op, 1);
12973 if (!CONST_INT_P (tmp))
12974 return 0;
12975 scale = INTVAL (tmp);
12976 if ((unsigned HOST_WIDE_INT) scale > 3)
12977 return 0;
12978 scale = 1 << scale;
12979 break;
12981 case ZERO_EXTEND:
12982 op = XEXP (op, 0);
12983 if (GET_CODE (op) != UNSPEC)
12984 return 0;
12985 /* FALLTHRU */
12987 case UNSPEC:
12988 if (XINT (op, 1) == UNSPEC_TP
12989 && TARGET_TLS_DIRECT_SEG_REFS
12990 && seg == SEG_DEFAULT)
12991 seg = DEFAULT_TLS_SEG_REG;
12992 else
12993 return 0;
12994 break;
12996 case SUBREG:
12997 if (!REG_P (SUBREG_REG (op)))
12998 return 0;
12999 /* FALLTHRU */
13001 case REG:
13002 if (!base)
13003 base = op;
13004 else if (!index)
13005 index = op;
13006 else
13007 return 0;
13008 break;
13010 case CONST:
13011 case CONST_INT:
13012 case SYMBOL_REF:
13013 case LABEL_REF:
13014 if (disp)
13015 return 0;
13016 disp = op;
13017 break;
13019 default:
13020 return 0;
13024 else if (GET_CODE (addr) == MULT)
13026 index = XEXP (addr, 0); /* index*scale */
13027 scale_rtx = XEXP (addr, 1);
13029 else if (GET_CODE (addr) == ASHIFT)
13031 /* We're called for lea too, which implements ashift on occasion. */
13032 index = XEXP (addr, 0);
13033 tmp = XEXP (addr, 1);
13034 if (!CONST_INT_P (tmp))
13035 return 0;
13036 scale = INTVAL (tmp);
13037 if ((unsigned HOST_WIDE_INT) scale > 3)
13038 return 0;
13039 scale = 1 << scale;
13040 retval = -1;
13042 else
13043 disp = addr; /* displacement */
13045 if (index)
13047 if (REG_P (index))
13049 else if (GET_CODE (index) == SUBREG
13050 && REG_P (SUBREG_REG (index)))
13052 else
13053 return 0;
13056 /* Extract the integral value of scale. */
13057 if (scale_rtx)
13059 if (!CONST_INT_P (scale_rtx))
13060 return 0;
13061 scale = INTVAL (scale_rtx);
13064 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
13065 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
13067 /* Avoid useless 0 displacement. */
13068 if (disp == const0_rtx && (base || index))
13069 disp = NULL_RTX;
13071 /* Allow arg pointer and stack pointer as index if there is not scaling. */
13072 if (base_reg && index_reg && scale == 1
13073 && (index_reg == arg_pointer_rtx
13074 || index_reg == frame_pointer_rtx
13075 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
13077 std::swap (base, index);
13078 std::swap (base_reg, index_reg);
13081 /* Special case: %ebp cannot be encoded as a base without a displacement.
13082 Similarly %r13. */
13083 if (!disp
13084 && base_reg
13085 && (base_reg == hard_frame_pointer_rtx
13086 || base_reg == frame_pointer_rtx
13087 || base_reg == arg_pointer_rtx
13088 || (REG_P (base_reg)
13089 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
13090 || REGNO (base_reg) == R13_REG))))
13091 disp = const0_rtx;
13093 /* Special case: on K6, [%esi] makes the instruction vector decoded.
13094 Avoid this by transforming to [%esi+0].
13095 Reload calls address legitimization without cfun defined, so we need
13096 to test cfun for being non-NULL. */
13097 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
13098 && base_reg && !index_reg && !disp
13099 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
13100 disp = const0_rtx;
13102 /* Special case: encode reg+reg instead of reg*2. */
13103 if (!base && index && scale == 2)
13104 base = index, base_reg = index_reg, scale = 1;
13106 /* Special case: scaling cannot be encoded without base or displacement. */
13107 if (!base && !disp && index && scale != 1)
13108 disp = const0_rtx;
13110 out->base = base;
13111 out->index = index;
13112 out->disp = disp;
13113 out->scale = scale;
13114 out->seg = seg;
13116 return retval;
13119 /* Return cost of the memory address x.
13120 For i386, it is better to use a complex address than let gcc copy
13121 the address into a reg and make a new pseudo. But not if the address
13122 requires to two regs - that would mean more pseudos with longer
13123 lifetimes. */
13124 static int
13125 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
13127 struct ix86_address parts;
13128 int cost = 1;
13129 int ok = ix86_decompose_address (x, &parts);
13131 gcc_assert (ok);
13133 if (parts.base && GET_CODE (parts.base) == SUBREG)
13134 parts.base = SUBREG_REG (parts.base);
13135 if (parts.index && GET_CODE (parts.index) == SUBREG)
13136 parts.index = SUBREG_REG (parts.index);
13138 /* Attempt to minimize number of registers in the address by increasing
13139 address cost for each used register. We don't increase address cost
13140 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
13141 is not invariant itself it most likely means that base or index is not
13142 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
13143 which is not profitable for x86. */
13144 if (parts.base
13145 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
13146 && (current_pass->type == GIMPLE_PASS
13147 || !pic_offset_table_rtx
13148 || !REG_P (parts.base)
13149 || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
13150 cost++;
13152 if (parts.index
13153 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
13154 && (current_pass->type == GIMPLE_PASS
13155 || !pic_offset_table_rtx
13156 || !REG_P (parts.index)
13157 || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
13158 cost++;
13160 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
13161 since it's predecode logic can't detect the length of instructions
13162 and it degenerates to vector decoded. Increase cost of such
13163 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
13164 to split such addresses or even refuse such addresses at all.
13166 Following addressing modes are affected:
13167 [base+scale*index]
13168 [scale*index+disp]
13169 [base+index]
13171 The first and last case may be avoidable by explicitly coding the zero in
13172 memory address, but I don't have AMD-K6 machine handy to check this
13173 theory. */
13175 if (TARGET_K6
13176 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
13177 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
13178 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
13179 cost += 10;
13181 return cost;
13184 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
13185 this is used for to form addresses to local data when -fPIC is in
13186 use. */
13188 static bool
13189 darwin_local_data_pic (rtx disp)
13191 return (GET_CODE (disp) == UNSPEC
13192 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
13195 /* Determine if a given RTX is a valid constant. We already know this
13196 satisfies CONSTANT_P. */
13198 static bool
13199 ix86_legitimate_constant_p (machine_mode, rtx x)
13201 /* Pointer bounds constants are not valid. */
13202 if (POINTER_BOUNDS_MODE_P (GET_MODE (x)))
13203 return false;
13205 switch (GET_CODE (x))
13207 case CONST:
13208 x = XEXP (x, 0);
13210 if (GET_CODE (x) == PLUS)
13212 if (!CONST_INT_P (XEXP (x, 1)))
13213 return false;
13214 x = XEXP (x, 0);
13217 if (TARGET_MACHO && darwin_local_data_pic (x))
13218 return true;
13220 /* Only some unspecs are valid as "constants". */
13221 if (GET_CODE (x) == UNSPEC)
13222 switch (XINT (x, 1))
13224 case UNSPEC_GOT:
13225 case UNSPEC_GOTOFF:
13226 case UNSPEC_PLTOFF:
13227 return TARGET_64BIT;
13228 case UNSPEC_TPOFF:
13229 case UNSPEC_NTPOFF:
13230 x = XVECEXP (x, 0, 0);
13231 return (GET_CODE (x) == SYMBOL_REF
13232 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
13233 case UNSPEC_DTPOFF:
13234 x = XVECEXP (x, 0, 0);
13235 return (GET_CODE (x) == SYMBOL_REF
13236 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
13237 default:
13238 return false;
13241 /* We must have drilled down to a symbol. */
13242 if (GET_CODE (x) == LABEL_REF)
13243 return true;
13244 if (GET_CODE (x) != SYMBOL_REF)
13245 return false;
13246 /* FALLTHRU */
13248 case SYMBOL_REF:
13249 /* TLS symbols are never valid. */
13250 if (SYMBOL_REF_TLS_MODEL (x))
13251 return false;
13253 /* DLLIMPORT symbols are never valid. */
13254 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
13255 && SYMBOL_REF_DLLIMPORT_P (x))
13256 return false;
13258 #if TARGET_MACHO
13259 /* mdynamic-no-pic */
13260 if (MACHO_DYNAMIC_NO_PIC_P)
13261 return machopic_symbol_defined_p (x);
13262 #endif
13263 break;
13265 case CONST_WIDE_INT:
13266 if (!TARGET_64BIT && !standard_sse_constant_p (x))
13267 return false;
13268 break;
13270 case CONST_VECTOR:
13271 if (!standard_sse_constant_p (x))
13272 return false;
13274 default:
13275 break;
13278 /* Otherwise we handle everything else in the move patterns. */
13279 return true;
13282 /* Determine if it's legal to put X into the constant pool. This
13283 is not possible for the address of thread-local symbols, which
13284 is checked above. */
13286 static bool
13287 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
13289 /* We can always put integral constants and vectors in memory. */
13290 switch (GET_CODE (x))
13292 case CONST_INT:
13293 case CONST_WIDE_INT:
13294 case CONST_DOUBLE:
13295 case CONST_VECTOR:
13296 return false;
13298 default:
13299 break;
13301 return !ix86_legitimate_constant_p (mode, x);
13304 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
13305 otherwise zero. */
13307 static bool
13308 is_imported_p (rtx x)
13310 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
13311 || GET_CODE (x) != SYMBOL_REF)
13312 return false;
13314 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
13318 /* Nonzero if the constant value X is a legitimate general operand
13319 when generating PIC code. It is given that flag_pic is on and
13320 that X satisfies CONSTANT_P. */
13322 bool
13323 legitimate_pic_operand_p (rtx x)
13325 rtx inner;
13327 switch (GET_CODE (x))
13329 case CONST:
13330 inner = XEXP (x, 0);
13331 if (GET_CODE (inner) == PLUS
13332 && CONST_INT_P (XEXP (inner, 1)))
13333 inner = XEXP (inner, 0);
13335 /* Only some unspecs are valid as "constants". */
13336 if (GET_CODE (inner) == UNSPEC)
13337 switch (XINT (inner, 1))
13339 case UNSPEC_GOT:
13340 case UNSPEC_GOTOFF:
13341 case UNSPEC_PLTOFF:
13342 return TARGET_64BIT;
13343 case UNSPEC_TPOFF:
13344 x = XVECEXP (inner, 0, 0);
13345 return (GET_CODE (x) == SYMBOL_REF
13346 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
13347 case UNSPEC_MACHOPIC_OFFSET:
13348 return legitimate_pic_address_disp_p (x);
13349 default:
13350 return false;
13352 /* FALLTHRU */
13354 case SYMBOL_REF:
13355 case LABEL_REF:
13356 return legitimate_pic_address_disp_p (x);
13358 default:
13359 return true;
13363 /* Determine if a given CONST RTX is a valid memory displacement
13364 in PIC mode. */
13366 bool
13367 legitimate_pic_address_disp_p (rtx disp)
13369 bool saw_plus;
13371 /* In 64bit mode we can allow direct addresses of symbols and labels
13372 when they are not dynamic symbols. */
13373 if (TARGET_64BIT)
13375 rtx op0 = disp, op1;
13377 switch (GET_CODE (disp))
13379 case LABEL_REF:
13380 return true;
13382 case CONST:
13383 if (GET_CODE (XEXP (disp, 0)) != PLUS)
13384 break;
13385 op0 = XEXP (XEXP (disp, 0), 0);
13386 op1 = XEXP (XEXP (disp, 0), 1);
13387 if (!CONST_INT_P (op1)
13388 || INTVAL (op1) >= 16*1024*1024
13389 || INTVAL (op1) < -16*1024*1024)
13390 break;
13391 if (GET_CODE (op0) == LABEL_REF)
13392 return true;
13393 if (GET_CODE (op0) == CONST
13394 && GET_CODE (XEXP (op0, 0)) == UNSPEC
13395 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
13396 return true;
13397 if (GET_CODE (op0) == UNSPEC
13398 && XINT (op0, 1) == UNSPEC_PCREL)
13399 return true;
13400 if (GET_CODE (op0) != SYMBOL_REF)
13401 break;
13402 /* FALLTHRU */
13404 case SYMBOL_REF:
13405 /* TLS references should always be enclosed in UNSPEC.
13406 The dllimported symbol needs always to be resolved. */
13407 if (SYMBOL_REF_TLS_MODEL (op0)
13408 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
13409 return false;
13411 if (TARGET_PECOFF)
13413 if (is_imported_p (op0))
13414 return true;
13416 if (SYMBOL_REF_FAR_ADDR_P (op0)
13417 || !SYMBOL_REF_LOCAL_P (op0))
13418 break;
13420 /* Function-symbols need to be resolved only for
13421 large-model.
13422 For the small-model we don't need to resolve anything
13423 here. */
13424 if ((ix86_cmodel != CM_LARGE_PIC
13425 && SYMBOL_REF_FUNCTION_P (op0))
13426 || ix86_cmodel == CM_SMALL_PIC)
13427 return true;
13428 /* Non-external symbols don't need to be resolved for
13429 large, and medium-model. */
13430 if ((ix86_cmodel == CM_LARGE_PIC
13431 || ix86_cmodel == CM_MEDIUM_PIC)
13432 && !SYMBOL_REF_EXTERNAL_P (op0))
13433 return true;
13435 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
13436 && (SYMBOL_REF_LOCAL_P (op0)
13437 || (HAVE_LD_PIE_COPYRELOC
13438 && flag_pie
13439 && !SYMBOL_REF_WEAK (op0)
13440 && !SYMBOL_REF_FUNCTION_P (op0)))
13441 && ix86_cmodel != CM_LARGE_PIC)
13442 return true;
13443 break;
13445 default:
13446 break;
13449 if (GET_CODE (disp) != CONST)
13450 return false;
13451 disp = XEXP (disp, 0);
13453 if (TARGET_64BIT)
13455 /* We are unsafe to allow PLUS expressions. This limit allowed distance
13456 of GOT tables. We should not need these anyway. */
13457 if (GET_CODE (disp) != UNSPEC
13458 || (XINT (disp, 1) != UNSPEC_GOTPCREL
13459 && XINT (disp, 1) != UNSPEC_GOTOFF
13460 && XINT (disp, 1) != UNSPEC_PCREL
13461 && XINT (disp, 1) != UNSPEC_PLTOFF))
13462 return false;
13464 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
13465 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
13466 return false;
13467 return true;
13470 saw_plus = false;
13471 if (GET_CODE (disp) == PLUS)
13473 if (!CONST_INT_P (XEXP (disp, 1)))
13474 return false;
13475 disp = XEXP (disp, 0);
13476 saw_plus = true;
13479 if (TARGET_MACHO && darwin_local_data_pic (disp))
13480 return true;
13482 if (GET_CODE (disp) != UNSPEC)
13483 return false;
13485 switch (XINT (disp, 1))
13487 case UNSPEC_GOT:
13488 if (saw_plus)
13489 return false;
13490 /* We need to check for both symbols and labels because VxWorks loads
13491 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
13492 details. */
13493 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13494 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
13495 case UNSPEC_GOTOFF:
13496 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
13497 While ABI specify also 32bit relocation but we don't produce it in
13498 small PIC model at all. */
13499 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13500 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
13501 && !TARGET_64BIT)
13502 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
13503 return false;
13504 case UNSPEC_GOTTPOFF:
13505 case UNSPEC_GOTNTPOFF:
13506 case UNSPEC_INDNTPOFF:
13507 if (saw_plus)
13508 return false;
13509 disp = XVECEXP (disp, 0, 0);
13510 return (GET_CODE (disp) == SYMBOL_REF
13511 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
13512 case UNSPEC_NTPOFF:
13513 disp = XVECEXP (disp, 0, 0);
13514 return (GET_CODE (disp) == SYMBOL_REF
13515 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
13516 case UNSPEC_DTPOFF:
13517 disp = XVECEXP (disp, 0, 0);
13518 return (GET_CODE (disp) == SYMBOL_REF
13519 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
13522 return false;
13525 /* Determine if op is suitable RTX for an address register.
13526 Return naked register if a register or a register subreg is
13527 found, otherwise return NULL_RTX. */
13529 static rtx
13530 ix86_validate_address_register (rtx op)
13532 machine_mode mode = GET_MODE (op);
13534 /* Only SImode or DImode registers can form the address. */
13535 if (mode != SImode && mode != DImode)
13536 return NULL_RTX;
13538 if (REG_P (op))
13539 return op;
13540 else if (GET_CODE (op) == SUBREG)
13542 rtx reg = SUBREG_REG (op);
13544 if (!REG_P (reg))
13545 return NULL_RTX;
13547 mode = GET_MODE (reg);
13549 /* Don't allow SUBREGs that span more than a word. It can
13550 lead to spill failures when the register is one word out
13551 of a two word structure. */
13552 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
13553 return NULL_RTX;
13555 /* Allow only SUBREGs of non-eliminable hard registers. */
13556 if (register_no_elim_operand (reg, mode))
13557 return reg;
13560 /* Op is not a register. */
13561 return NULL_RTX;
13564 /* Recognizes RTL expressions that are valid memory addresses for an
13565 instruction. The MODE argument is the machine mode for the MEM
13566 expression that wants to use this address.
13568 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
13569 convert common non-canonical forms to canonical form so that they will
13570 be recognized. */
13572 static bool
13573 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
13575 struct ix86_address parts;
13576 rtx base, index, disp;
13577 HOST_WIDE_INT scale;
13578 enum ix86_address_seg seg;
13580 if (ix86_decompose_address (addr, &parts) <= 0)
13581 /* Decomposition failed. */
13582 return false;
13584 base = parts.base;
13585 index = parts.index;
13586 disp = parts.disp;
13587 scale = parts.scale;
13588 seg = parts.seg;
13590 /* Validate base register. */
13591 if (base)
13593 rtx reg = ix86_validate_address_register (base);
13595 if (reg == NULL_RTX)
13596 return false;
13598 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
13599 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
13600 /* Base is not valid. */
13601 return false;
13604 /* Validate index register. */
13605 if (index)
13607 rtx reg = ix86_validate_address_register (index);
13609 if (reg == NULL_RTX)
13610 return false;
13612 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
13613 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
13614 /* Index is not valid. */
13615 return false;
13618 /* Index and base should have the same mode. */
13619 if (base && index
13620 && GET_MODE (base) != GET_MODE (index))
13621 return false;
13623 /* Address override works only on the (%reg) part of %fs:(%reg). */
13624 if (seg != SEG_DEFAULT
13625 && ((base && GET_MODE (base) != word_mode)
13626 || (index && GET_MODE (index) != word_mode)))
13627 return false;
13629 /* Validate scale factor. */
13630 if (scale != 1)
13632 if (!index)
13633 /* Scale without index. */
13634 return false;
13636 if (scale != 2 && scale != 4 && scale != 8)
13637 /* Scale is not a valid multiplier. */
13638 return false;
13641 /* Validate displacement. */
13642 if (disp)
13644 if (GET_CODE (disp) == CONST
13645 && GET_CODE (XEXP (disp, 0)) == UNSPEC
13646 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
13647 switch (XINT (XEXP (disp, 0), 1))
13649 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
13650 used. While ABI specify also 32bit relocations, we don't produce
13651 them at all and use IP relative instead. */
13652 case UNSPEC_GOT:
13653 case UNSPEC_GOTOFF:
13654 gcc_assert (flag_pic);
13655 if (!TARGET_64BIT)
13656 goto is_legitimate_pic;
13658 /* 64bit address unspec. */
13659 return false;
13661 case UNSPEC_GOTPCREL:
13662 case UNSPEC_PCREL:
13663 gcc_assert (flag_pic);
13664 goto is_legitimate_pic;
13666 case UNSPEC_GOTTPOFF:
13667 case UNSPEC_GOTNTPOFF:
13668 case UNSPEC_INDNTPOFF:
13669 case UNSPEC_NTPOFF:
13670 case UNSPEC_DTPOFF:
13671 break;
13673 case UNSPEC_STACK_CHECK:
13674 gcc_assert (flag_split_stack);
13675 break;
13677 default:
13678 /* Invalid address unspec. */
13679 return false;
13682 else if (SYMBOLIC_CONST (disp)
13683 && (flag_pic
13684 || (TARGET_MACHO
13685 #if TARGET_MACHO
13686 && MACHOPIC_INDIRECT
13687 && !machopic_operand_p (disp)
13688 #endif
13692 is_legitimate_pic:
13693 if (TARGET_64BIT && (index || base))
13695 /* foo@dtpoff(%rX) is ok. */
13696 if (GET_CODE (disp) != CONST
13697 || GET_CODE (XEXP (disp, 0)) != PLUS
13698 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13699 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13700 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13701 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13702 /* Non-constant pic memory reference. */
13703 return false;
13705 else if ((!TARGET_MACHO || flag_pic)
13706 && ! legitimate_pic_address_disp_p (disp))
13707 /* Displacement is an invalid pic construct. */
13708 return false;
13709 #if TARGET_MACHO
13710 else if (MACHO_DYNAMIC_NO_PIC_P
13711 && !ix86_legitimate_constant_p (Pmode, disp))
13712 /* displacment must be referenced via non_lazy_pointer */
13713 return false;
13714 #endif
13716 /* This code used to verify that a symbolic pic displacement
13717 includes the pic_offset_table_rtx register.
13719 While this is good idea, unfortunately these constructs may
13720 be created by "adds using lea" optimization for incorrect
13721 code like:
13723 int a;
13724 int foo(int i)
13726 return *(&a+i);
13729 This code is nonsensical, but results in addressing
13730 GOT table with pic_offset_table_rtx base. We can't
13731 just refuse it easily, since it gets matched by
13732 "addsi3" pattern, that later gets split to lea in the
13733 case output register differs from input. While this
13734 can be handled by separate addsi pattern for this case
13735 that never results in lea, this seems to be easier and
13736 correct fix for crash to disable this test. */
13738 else if (GET_CODE (disp) != LABEL_REF
13739 && !CONST_INT_P (disp)
13740 && (GET_CODE (disp) != CONST
13741 || !ix86_legitimate_constant_p (Pmode, disp))
13742 && (GET_CODE (disp) != SYMBOL_REF
13743 || !ix86_legitimate_constant_p (Pmode, disp)))
13744 /* Displacement is not constant. */
13745 return false;
13746 else if (TARGET_64BIT
13747 && !x86_64_immediate_operand (disp, VOIDmode))
13748 /* Displacement is out of range. */
13749 return false;
13750 /* In x32 mode, constant addresses are sign extended to 64bit, so
13751 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13752 else if (TARGET_X32 && !(index || base)
13753 && CONST_INT_P (disp)
13754 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13755 return false;
13758 /* Everything looks valid. */
13759 return true;
13762 /* Determine if a given RTX is a valid constant address. */
13764 bool
13765 constant_address_p (rtx x)
13767 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13770 /* Return a unique alias set for the GOT. */
13772 static alias_set_type
13773 ix86_GOT_alias_set (void)
13775 static alias_set_type set = -1;
13776 if (set == -1)
13777 set = new_alias_set ();
13778 return set;
13781 /* Return a legitimate reference for ORIG (an address) using the
13782 register REG. If REG is 0, a new pseudo is generated.
13784 There are two types of references that must be handled:
13786 1. Global data references must load the address from the GOT, via
13787 the PIC reg. An insn is emitted to do this load, and the reg is
13788 returned.
13790 2. Static data references, constant pool addresses, and code labels
13791 compute the address as an offset from the GOT, whose base is in
13792 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13793 differentiate them from global data objects. The returned
13794 address is the PIC reg + an unspec constant.
13796 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13797 reg also appears in the address. */
13799 static rtx
13800 legitimize_pic_address (rtx orig, rtx reg)
13802 rtx addr = orig;
13803 rtx new_rtx = orig;
13805 #if TARGET_MACHO
13806 if (TARGET_MACHO && !TARGET_64BIT)
13808 if (reg == 0)
13809 reg = gen_reg_rtx (Pmode);
13810 /* Use the generic Mach-O PIC machinery. */
13811 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13813 #endif
13815 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13817 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13818 if (tmp)
13819 return tmp;
13822 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13823 new_rtx = addr;
13824 else if (TARGET_64BIT && !TARGET_PECOFF
13825 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13827 rtx tmpreg;
13828 /* This symbol may be referenced via a displacement from the PIC
13829 base address (@GOTOFF). */
13831 if (GET_CODE (addr) == CONST)
13832 addr = XEXP (addr, 0);
13833 if (GET_CODE (addr) == PLUS)
13835 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13836 UNSPEC_GOTOFF);
13837 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13839 else
13840 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13841 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13842 if (!reg)
13843 tmpreg = gen_reg_rtx (Pmode);
13844 else
13845 tmpreg = reg;
13846 emit_move_insn (tmpreg, new_rtx);
13848 if (reg != 0)
13850 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13851 tmpreg, 1, OPTAB_DIRECT);
13852 new_rtx = reg;
13854 else
13855 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13857 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13859 /* This symbol may be referenced via a displacement from the PIC
13860 base address (@GOTOFF). */
13862 if (GET_CODE (addr) == CONST)
13863 addr = XEXP (addr, 0);
13864 if (GET_CODE (addr) == PLUS)
13866 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13867 UNSPEC_GOTOFF);
13868 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13870 else
13871 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13872 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13873 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13875 if (reg != 0)
13877 emit_move_insn (reg, new_rtx);
13878 new_rtx = reg;
13881 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13882 /* We can't use @GOTOFF for text labels on VxWorks;
13883 see gotoff_operand. */
13884 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13886 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13887 if (tmp)
13888 return tmp;
13890 /* For x64 PE-COFF there is no GOT table. So we use address
13891 directly. */
13892 if (TARGET_64BIT && TARGET_PECOFF)
13894 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13895 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13897 if (reg == 0)
13898 reg = gen_reg_rtx (Pmode);
13899 emit_move_insn (reg, new_rtx);
13900 new_rtx = reg;
13902 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13904 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13905 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13906 new_rtx = gen_const_mem (Pmode, new_rtx);
13907 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13909 if (reg == 0)
13910 reg = gen_reg_rtx (Pmode);
13911 /* Use directly gen_movsi, otherwise the address is loaded
13912 into register for CSE. We don't want to CSE this addresses,
13913 instead we CSE addresses from the GOT table, so skip this. */
13914 emit_insn (gen_movsi (reg, new_rtx));
13915 new_rtx = reg;
13917 else
13919 /* This symbol must be referenced via a load from the
13920 Global Offset Table (@GOT). */
13922 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13923 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13924 if (TARGET_64BIT)
13925 new_rtx = force_reg (Pmode, new_rtx);
13926 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13927 new_rtx = gen_const_mem (Pmode, new_rtx);
13928 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13930 if (reg == 0)
13931 reg = gen_reg_rtx (Pmode);
13932 emit_move_insn (reg, new_rtx);
13933 new_rtx = reg;
13936 else
13938 if (CONST_INT_P (addr)
13939 && !x86_64_immediate_operand (addr, VOIDmode))
13941 if (reg)
13943 emit_move_insn (reg, addr);
13944 new_rtx = reg;
13946 else
13947 new_rtx = force_reg (Pmode, addr);
13949 else if (GET_CODE (addr) == CONST)
13951 addr = XEXP (addr, 0);
13953 /* We must match stuff we generate before. Assume the only
13954 unspecs that can get here are ours. Not that we could do
13955 anything with them anyway.... */
13956 if (GET_CODE (addr) == UNSPEC
13957 || (GET_CODE (addr) == PLUS
13958 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13959 return orig;
13960 gcc_assert (GET_CODE (addr) == PLUS);
13962 if (GET_CODE (addr) == PLUS)
13964 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13966 /* Check first to see if this is a constant offset from a @GOTOFF
13967 symbol reference. */
13968 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13969 && CONST_INT_P (op1))
13971 if (!TARGET_64BIT)
13973 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13974 UNSPEC_GOTOFF);
13975 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13976 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13977 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13979 if (reg != 0)
13981 emit_move_insn (reg, new_rtx);
13982 new_rtx = reg;
13985 else
13987 if (INTVAL (op1) < -16*1024*1024
13988 || INTVAL (op1) >= 16*1024*1024)
13990 if (!x86_64_immediate_operand (op1, Pmode))
13991 op1 = force_reg (Pmode, op1);
13992 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13996 else
13998 rtx base = legitimize_pic_address (op0, reg);
13999 machine_mode mode = GET_MODE (base);
14000 new_rtx
14001 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
14003 if (CONST_INT_P (new_rtx))
14005 if (INTVAL (new_rtx) < -16*1024*1024
14006 || INTVAL (new_rtx) >= 16*1024*1024)
14008 if (!x86_64_immediate_operand (new_rtx, mode))
14009 new_rtx = force_reg (mode, new_rtx);
14010 new_rtx
14011 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
14013 else
14014 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
14016 else
14018 /* For %rip addressing, we have to use just disp32, not
14019 base nor index. */
14020 if (TARGET_64BIT
14021 && (GET_CODE (base) == SYMBOL_REF
14022 || GET_CODE (base) == LABEL_REF))
14023 base = force_reg (mode, base);
14024 if (GET_CODE (new_rtx) == PLUS
14025 && CONSTANT_P (XEXP (new_rtx, 1)))
14027 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
14028 new_rtx = XEXP (new_rtx, 1);
14030 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
14035 return new_rtx;
14038 /* Load the thread pointer. If TO_REG is true, force it into a register. */
14040 static rtx
14041 get_thread_pointer (machine_mode tp_mode, bool to_reg)
14043 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
14045 if (GET_MODE (tp) != tp_mode)
14047 gcc_assert (GET_MODE (tp) == SImode);
14048 gcc_assert (tp_mode == DImode);
14050 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
14053 if (to_reg)
14054 tp = copy_to_mode_reg (tp_mode, tp);
14056 return tp;
14059 /* Construct the SYMBOL_REF for the tls_get_addr function. */
14061 static GTY(()) rtx ix86_tls_symbol;
14063 static rtx
14064 ix86_tls_get_addr (void)
14066 if (!ix86_tls_symbol)
14068 const char *sym
14069 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
14070 ? "___tls_get_addr" : "__tls_get_addr");
14072 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
14075 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
14077 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
14078 UNSPEC_PLTOFF);
14079 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
14080 gen_rtx_CONST (Pmode, unspec));
14083 return ix86_tls_symbol;
14086 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
14088 static GTY(()) rtx ix86_tls_module_base_symbol;
14091 ix86_tls_module_base (void)
14093 if (!ix86_tls_module_base_symbol)
14095 ix86_tls_module_base_symbol
14096 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
14098 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
14099 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
14102 return ix86_tls_module_base_symbol;
14105 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
14106 false if we expect this to be used for a memory address and true if
14107 we expect to load the address into a register. */
14109 static rtx
14110 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
14112 rtx dest, base, off;
14113 rtx pic = NULL_RTX, tp = NULL_RTX;
14114 machine_mode tp_mode = Pmode;
14115 int type;
14117 /* Fall back to global dynamic model if tool chain cannot support local
14118 dynamic. */
14119 if (TARGET_SUN_TLS && !TARGET_64BIT
14120 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
14121 && model == TLS_MODEL_LOCAL_DYNAMIC)
14122 model = TLS_MODEL_GLOBAL_DYNAMIC;
14124 switch (model)
14126 case TLS_MODEL_GLOBAL_DYNAMIC:
14127 dest = gen_reg_rtx (Pmode);
14129 if (!TARGET_64BIT)
14131 if (flag_pic && !TARGET_PECOFF)
14132 pic = pic_offset_table_rtx;
14133 else
14135 pic = gen_reg_rtx (Pmode);
14136 emit_insn (gen_set_got (pic));
14140 if (TARGET_GNU2_TLS)
14142 if (TARGET_64BIT)
14143 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
14144 else
14145 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
14147 tp = get_thread_pointer (Pmode, true);
14148 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
14150 if (GET_MODE (x) != Pmode)
14151 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14153 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
14155 else
14157 rtx caddr = ix86_tls_get_addr ();
14159 if (TARGET_64BIT)
14161 rtx rax = gen_rtx_REG (Pmode, AX_REG);
14162 rtx_insn *insns;
14164 start_sequence ();
14165 emit_call_insn
14166 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
14167 insns = get_insns ();
14168 end_sequence ();
14170 if (GET_MODE (x) != Pmode)
14171 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14173 RTL_CONST_CALL_P (insns) = 1;
14174 emit_libcall_block (insns, dest, rax, x);
14176 else
14177 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
14179 break;
14181 case TLS_MODEL_LOCAL_DYNAMIC:
14182 base = gen_reg_rtx (Pmode);
14184 if (!TARGET_64BIT)
14186 if (flag_pic)
14187 pic = pic_offset_table_rtx;
14188 else
14190 pic = gen_reg_rtx (Pmode);
14191 emit_insn (gen_set_got (pic));
14195 if (TARGET_GNU2_TLS)
14197 rtx tmp = ix86_tls_module_base ();
14199 if (TARGET_64BIT)
14200 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
14201 else
14202 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
14204 tp = get_thread_pointer (Pmode, true);
14205 set_unique_reg_note (get_last_insn (), REG_EQUAL,
14206 gen_rtx_MINUS (Pmode, tmp, tp));
14208 else
14210 rtx caddr = ix86_tls_get_addr ();
14212 if (TARGET_64BIT)
14214 rtx rax = gen_rtx_REG (Pmode, AX_REG);
14215 rtx_insn *insns;
14216 rtx eqv;
14218 start_sequence ();
14219 emit_call_insn
14220 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
14221 insns = get_insns ();
14222 end_sequence ();
14224 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
14225 share the LD_BASE result with other LD model accesses. */
14226 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
14227 UNSPEC_TLS_LD_BASE);
14229 RTL_CONST_CALL_P (insns) = 1;
14230 emit_libcall_block (insns, base, rax, eqv);
14232 else
14233 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
14236 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
14237 off = gen_rtx_CONST (Pmode, off);
14239 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
14241 if (TARGET_GNU2_TLS)
14243 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
14245 if (GET_MODE (x) != Pmode)
14246 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14248 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
14250 break;
14252 case TLS_MODEL_INITIAL_EXEC:
14253 if (TARGET_64BIT)
14255 if (TARGET_SUN_TLS && !TARGET_X32)
14257 /* The Sun linker took the AMD64 TLS spec literally
14258 and can only handle %rax as destination of the
14259 initial executable code sequence. */
14261 dest = gen_reg_rtx (DImode);
14262 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
14263 return dest;
14266 /* Generate DImode references to avoid %fs:(%reg32)
14267 problems and linker IE->LE relaxation bug. */
14268 tp_mode = DImode;
14269 pic = NULL;
14270 type = UNSPEC_GOTNTPOFF;
14272 else if (flag_pic)
14274 pic = pic_offset_table_rtx;
14275 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
14277 else if (!TARGET_ANY_GNU_TLS)
14279 pic = gen_reg_rtx (Pmode);
14280 emit_insn (gen_set_got (pic));
14281 type = UNSPEC_GOTTPOFF;
14283 else
14285 pic = NULL;
14286 type = UNSPEC_INDNTPOFF;
14289 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
14290 off = gen_rtx_CONST (tp_mode, off);
14291 if (pic)
14292 off = gen_rtx_PLUS (tp_mode, pic, off);
14293 off = gen_const_mem (tp_mode, off);
14294 set_mem_alias_set (off, ix86_GOT_alias_set ());
14296 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14298 base = get_thread_pointer (tp_mode,
14299 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14300 off = force_reg (tp_mode, off);
14301 return gen_rtx_PLUS (tp_mode, base, off);
14303 else
14305 base = get_thread_pointer (Pmode, true);
14306 dest = gen_reg_rtx (Pmode);
14307 emit_insn (ix86_gen_sub3 (dest, base, off));
14309 break;
14311 case TLS_MODEL_LOCAL_EXEC:
14312 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
14313 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14314 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
14315 off = gen_rtx_CONST (Pmode, off);
14317 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14319 base = get_thread_pointer (Pmode,
14320 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14321 return gen_rtx_PLUS (Pmode, base, off);
14323 else
14325 base = get_thread_pointer (Pmode, true);
14326 dest = gen_reg_rtx (Pmode);
14327 emit_insn (ix86_gen_sub3 (dest, base, off));
14329 break;
14331 default:
14332 gcc_unreachable ();
14335 return dest;
14338 /* Create or return the unique __imp_DECL dllimport symbol corresponding
14339 to symbol DECL if BEIMPORT is true. Otherwise create or return the
14340 unique refptr-DECL symbol corresponding to symbol DECL. */
14342 struct dllimport_hasher : ggc_cache_ptr_hash<tree_map>
14344 static inline hashval_t hash (tree_map *m) { return m->hash; }
14345 static inline bool
14346 equal (tree_map *a, tree_map *b)
14348 return a->base.from == b->base.from;
14351 static int
14352 keep_cache_entry (tree_map *&m)
14354 return ggc_marked_p (m->base.from);
14358 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
14360 static tree
14361 get_dllimport_decl (tree decl, bool beimport)
14363 struct tree_map *h, in;
14364 const char *name;
14365 const char *prefix;
14366 size_t namelen, prefixlen;
14367 char *imp_name;
14368 tree to;
14369 rtx rtl;
14371 if (!dllimport_map)
14372 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
14374 in.hash = htab_hash_pointer (decl);
14375 in.base.from = decl;
14376 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
14377 h = *loc;
14378 if (h)
14379 return h->to;
14381 *loc = h = ggc_alloc<tree_map> ();
14382 h->hash = in.hash;
14383 h->base.from = decl;
14384 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
14385 VAR_DECL, NULL, ptr_type_node);
14386 DECL_ARTIFICIAL (to) = 1;
14387 DECL_IGNORED_P (to) = 1;
14388 DECL_EXTERNAL (to) = 1;
14389 TREE_READONLY (to) = 1;
14391 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
14392 name = targetm.strip_name_encoding (name);
14393 if (beimport)
14394 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
14395 ? "*__imp_" : "*__imp__";
14396 else
14397 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
14398 namelen = strlen (name);
14399 prefixlen = strlen (prefix);
14400 imp_name = (char *) alloca (namelen + prefixlen + 1);
14401 memcpy (imp_name, prefix, prefixlen);
14402 memcpy (imp_name + prefixlen, name, namelen + 1);
14404 name = ggc_alloc_string (imp_name, namelen + prefixlen);
14405 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
14406 SET_SYMBOL_REF_DECL (rtl, to);
14407 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
14408 if (!beimport)
14410 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
14411 #ifdef SUB_TARGET_RECORD_STUB
14412 SUB_TARGET_RECORD_STUB (name);
14413 #endif
14416 rtl = gen_const_mem (Pmode, rtl);
14417 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
14419 SET_DECL_RTL (to, rtl);
14420 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
14422 return to;
14425 /* Expand SYMBOL into its corresponding far-addresse symbol.
14426 WANT_REG is true if we require the result be a register. */
14428 static rtx
14429 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
14431 tree imp_decl;
14432 rtx x;
14434 gcc_assert (SYMBOL_REF_DECL (symbol));
14435 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
14437 x = DECL_RTL (imp_decl);
14438 if (want_reg)
14439 x = force_reg (Pmode, x);
14440 return x;
14443 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
14444 true if we require the result be a register. */
14446 static rtx
14447 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
14449 tree imp_decl;
14450 rtx x;
14452 gcc_assert (SYMBOL_REF_DECL (symbol));
14453 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
14455 x = DECL_RTL (imp_decl);
14456 if (want_reg)
14457 x = force_reg (Pmode, x);
14458 return x;
14461 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
14462 is true if we require the result be a register. */
14464 static rtx
14465 legitimize_pe_coff_symbol (rtx addr, bool inreg)
14467 if (!TARGET_PECOFF)
14468 return NULL_RTX;
14470 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14472 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
14473 return legitimize_dllimport_symbol (addr, inreg);
14474 if (GET_CODE (addr) == CONST
14475 && GET_CODE (XEXP (addr, 0)) == PLUS
14476 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14477 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
14479 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
14480 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14484 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
14485 return NULL_RTX;
14486 if (GET_CODE (addr) == SYMBOL_REF
14487 && !is_imported_p (addr)
14488 && SYMBOL_REF_EXTERNAL_P (addr)
14489 && SYMBOL_REF_DECL (addr))
14490 return legitimize_pe_coff_extern_decl (addr, inreg);
14492 if (GET_CODE (addr) == CONST
14493 && GET_CODE (XEXP (addr, 0)) == PLUS
14494 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14495 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
14496 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
14497 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
14499 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
14500 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14502 return NULL_RTX;
14505 /* Try machine-dependent ways of modifying an illegitimate address
14506 to be legitimate. If we find one, return the new, valid address.
14507 This macro is used in only one place: `memory_address' in explow.c.
14509 OLDX is the address as it was before break_out_memory_refs was called.
14510 In some cases it is useful to look at this to decide what needs to be done.
14512 It is always safe for this macro to do nothing. It exists to recognize
14513 opportunities to optimize the output.
14515 For the 80386, we handle X+REG by loading X into a register R and
14516 using R+REG. R will go in a general reg and indexing will be used.
14517 However, if REG is a broken-out memory address or multiplication,
14518 nothing needs to be done because REG can certainly go in a general reg.
14520 When -fpic is used, special handling is needed for symbolic references.
14521 See comments by legitimize_pic_address in i386.c for details. */
14523 static rtx
14524 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
14526 bool changed = false;
14527 unsigned log;
14529 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
14530 if (log)
14531 return legitimize_tls_address (x, (enum tls_model) log, false);
14532 if (GET_CODE (x) == CONST
14533 && GET_CODE (XEXP (x, 0)) == PLUS
14534 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
14535 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
14537 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
14538 (enum tls_model) log, false);
14539 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
14542 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14544 rtx tmp = legitimize_pe_coff_symbol (x, true);
14545 if (tmp)
14546 return tmp;
14549 if (flag_pic && SYMBOLIC_CONST (x))
14550 return legitimize_pic_address (x, 0);
14552 #if TARGET_MACHO
14553 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
14554 return machopic_indirect_data_reference (x, 0);
14555 #endif
14557 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
14558 if (GET_CODE (x) == ASHIFT
14559 && CONST_INT_P (XEXP (x, 1))
14560 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
14562 changed = true;
14563 log = INTVAL (XEXP (x, 1));
14564 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
14565 GEN_INT (1 << log));
14568 if (GET_CODE (x) == PLUS)
14570 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
14572 if (GET_CODE (XEXP (x, 0)) == ASHIFT
14573 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14574 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
14576 changed = true;
14577 log = INTVAL (XEXP (XEXP (x, 0), 1));
14578 XEXP (x, 0) = gen_rtx_MULT (Pmode,
14579 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
14580 GEN_INT (1 << log));
14583 if (GET_CODE (XEXP (x, 1)) == ASHIFT
14584 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
14585 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
14587 changed = true;
14588 log = INTVAL (XEXP (XEXP (x, 1), 1));
14589 XEXP (x, 1) = gen_rtx_MULT (Pmode,
14590 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
14591 GEN_INT (1 << log));
14594 /* Put multiply first if it isn't already. */
14595 if (GET_CODE (XEXP (x, 1)) == MULT)
14597 std::swap (XEXP (x, 0), XEXP (x, 1));
14598 changed = true;
14601 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
14602 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
14603 created by virtual register instantiation, register elimination, and
14604 similar optimizations. */
14605 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
14607 changed = true;
14608 x = gen_rtx_PLUS (Pmode,
14609 gen_rtx_PLUS (Pmode, XEXP (x, 0),
14610 XEXP (XEXP (x, 1), 0)),
14611 XEXP (XEXP (x, 1), 1));
14614 /* Canonicalize
14615 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
14616 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
14617 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
14618 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14619 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
14620 && CONSTANT_P (XEXP (x, 1)))
14622 rtx constant;
14623 rtx other = NULL_RTX;
14625 if (CONST_INT_P (XEXP (x, 1)))
14627 constant = XEXP (x, 1);
14628 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
14630 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
14632 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
14633 other = XEXP (x, 1);
14635 else
14636 constant = 0;
14638 if (constant)
14640 changed = true;
14641 x = gen_rtx_PLUS (Pmode,
14642 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
14643 XEXP (XEXP (XEXP (x, 0), 1), 0)),
14644 plus_constant (Pmode, other,
14645 INTVAL (constant)));
14649 if (changed && ix86_legitimate_address_p (mode, x, false))
14650 return x;
14652 if (GET_CODE (XEXP (x, 0)) == MULT)
14654 changed = true;
14655 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
14658 if (GET_CODE (XEXP (x, 1)) == MULT)
14660 changed = true;
14661 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
14664 if (changed
14665 && REG_P (XEXP (x, 1))
14666 && REG_P (XEXP (x, 0)))
14667 return x;
14669 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
14671 changed = true;
14672 x = legitimize_pic_address (x, 0);
14675 if (changed && ix86_legitimate_address_p (mode, x, false))
14676 return x;
14678 if (REG_P (XEXP (x, 0)))
14680 rtx temp = gen_reg_rtx (Pmode);
14681 rtx val = force_operand (XEXP (x, 1), temp);
14682 if (val != temp)
14684 val = convert_to_mode (Pmode, val, 1);
14685 emit_move_insn (temp, val);
14688 XEXP (x, 1) = temp;
14689 return x;
14692 else if (REG_P (XEXP (x, 1)))
14694 rtx temp = gen_reg_rtx (Pmode);
14695 rtx val = force_operand (XEXP (x, 0), temp);
14696 if (val != temp)
14698 val = convert_to_mode (Pmode, val, 1);
14699 emit_move_insn (temp, val);
14702 XEXP (x, 0) = temp;
14703 return x;
14707 return x;
14710 /* Print an integer constant expression in assembler syntax. Addition
14711 and subtraction are the only arithmetic that may appear in these
14712 expressions. FILE is the stdio stream to write to, X is the rtx, and
14713 CODE is the operand print code from the output string. */
14715 static void
14716 output_pic_addr_const (FILE *file, rtx x, int code)
14718 char buf[256];
14720 switch (GET_CODE (x))
14722 case PC:
14723 gcc_assert (flag_pic);
14724 putc ('.', file);
14725 break;
14727 case SYMBOL_REF:
14728 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14729 output_addr_const (file, x);
14730 else
14732 const char *name = XSTR (x, 0);
14734 /* Mark the decl as referenced so that cgraph will
14735 output the function. */
14736 if (SYMBOL_REF_DECL (x))
14737 mark_decl_referenced (SYMBOL_REF_DECL (x));
14739 #if TARGET_MACHO
14740 if (MACHOPIC_INDIRECT
14741 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14742 name = machopic_indirection_name (x, /*stub_p=*/true);
14743 #endif
14744 assemble_name (file, name);
14746 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14747 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14748 fputs ("@PLT", file);
14749 break;
14751 case LABEL_REF:
14752 x = XEXP (x, 0);
14753 /* FALLTHRU */
14754 case CODE_LABEL:
14755 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14756 assemble_name (asm_out_file, buf);
14757 break;
14759 case CONST_INT:
14760 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14761 break;
14763 case CONST:
14764 /* This used to output parentheses around the expression,
14765 but that does not work on the 386 (either ATT or BSD assembler). */
14766 output_pic_addr_const (file, XEXP (x, 0), code);
14767 break;
14769 case CONST_DOUBLE:
14770 /* We can't handle floating point constants;
14771 TARGET_PRINT_OPERAND must handle them. */
14772 output_operand_lossage ("floating constant misused");
14773 break;
14775 case PLUS:
14776 /* Some assemblers need integer constants to appear first. */
14777 if (CONST_INT_P (XEXP (x, 0)))
14779 output_pic_addr_const (file, XEXP (x, 0), code);
14780 putc ('+', file);
14781 output_pic_addr_const (file, XEXP (x, 1), code);
14783 else
14785 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14786 output_pic_addr_const (file, XEXP (x, 1), code);
14787 putc ('+', file);
14788 output_pic_addr_const (file, XEXP (x, 0), code);
14790 break;
14792 case MINUS:
14793 if (!TARGET_MACHO)
14794 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14795 output_pic_addr_const (file, XEXP (x, 0), code);
14796 putc ('-', file);
14797 output_pic_addr_const (file, XEXP (x, 1), code);
14798 if (!TARGET_MACHO)
14799 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14800 break;
14802 case UNSPEC:
14803 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14805 bool f = i386_asm_output_addr_const_extra (file, x);
14806 gcc_assert (f);
14807 break;
14810 gcc_assert (XVECLEN (x, 0) == 1);
14811 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14812 switch (XINT (x, 1))
14814 case UNSPEC_GOT:
14815 fputs ("@GOT", file);
14816 break;
14817 case UNSPEC_GOTOFF:
14818 fputs ("@GOTOFF", file);
14819 break;
14820 case UNSPEC_PLTOFF:
14821 fputs ("@PLTOFF", file);
14822 break;
14823 case UNSPEC_PCREL:
14824 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14825 "(%rip)" : "[rip]", file);
14826 break;
14827 case UNSPEC_GOTPCREL:
14828 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14829 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14830 break;
14831 case UNSPEC_GOTTPOFF:
14832 /* FIXME: This might be @TPOFF in Sun ld too. */
14833 fputs ("@gottpoff", file);
14834 break;
14835 case UNSPEC_TPOFF:
14836 fputs ("@tpoff", file);
14837 break;
14838 case UNSPEC_NTPOFF:
14839 if (TARGET_64BIT)
14840 fputs ("@tpoff", file);
14841 else
14842 fputs ("@ntpoff", file);
14843 break;
14844 case UNSPEC_DTPOFF:
14845 fputs ("@dtpoff", file);
14846 break;
14847 case UNSPEC_GOTNTPOFF:
14848 if (TARGET_64BIT)
14849 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14850 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14851 else
14852 fputs ("@gotntpoff", file);
14853 break;
14854 case UNSPEC_INDNTPOFF:
14855 fputs ("@indntpoff", file);
14856 break;
14857 #if TARGET_MACHO
14858 case UNSPEC_MACHOPIC_OFFSET:
14859 putc ('-', file);
14860 machopic_output_function_base_name (file);
14861 break;
14862 #endif
14863 default:
14864 output_operand_lossage ("invalid UNSPEC as operand");
14865 break;
14867 break;
14869 default:
14870 output_operand_lossage ("invalid expression as operand");
14874 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14875 We need to emit DTP-relative relocations. */
14877 static void ATTRIBUTE_UNUSED
14878 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14880 fputs (ASM_LONG, file);
14881 output_addr_const (file, x);
14882 fputs ("@dtpoff", file);
14883 switch (size)
14885 case 4:
14886 break;
14887 case 8:
14888 fputs (", 0", file);
14889 break;
14890 default:
14891 gcc_unreachable ();
14895 /* Return true if X is a representation of the PIC register. This copes
14896 with calls from ix86_find_base_term, where the register might have
14897 been replaced by a cselib value. */
14899 static bool
14900 ix86_pic_register_p (rtx x)
14902 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14903 return (pic_offset_table_rtx
14904 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14905 else if (!REG_P (x))
14906 return false;
14907 else if (pic_offset_table_rtx)
14909 if (REGNO (x) == REGNO (pic_offset_table_rtx))
14910 return true;
14911 if (HARD_REGISTER_P (x)
14912 && !HARD_REGISTER_P (pic_offset_table_rtx)
14913 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
14914 return true;
14915 return false;
14917 else
14918 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14921 /* Helper function for ix86_delegitimize_address.
14922 Attempt to delegitimize TLS local-exec accesses. */
14924 static rtx
14925 ix86_delegitimize_tls_address (rtx orig_x)
14927 rtx x = orig_x, unspec;
14928 struct ix86_address addr;
14930 if (!TARGET_TLS_DIRECT_SEG_REFS)
14931 return orig_x;
14932 if (MEM_P (x))
14933 x = XEXP (x, 0);
14934 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14935 return orig_x;
14936 if (ix86_decompose_address (x, &addr) == 0
14937 || addr.seg != DEFAULT_TLS_SEG_REG
14938 || addr.disp == NULL_RTX
14939 || GET_CODE (addr.disp) != CONST)
14940 return orig_x;
14941 unspec = XEXP (addr.disp, 0);
14942 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14943 unspec = XEXP (unspec, 0);
14944 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14945 return orig_x;
14946 x = XVECEXP (unspec, 0, 0);
14947 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14948 if (unspec != XEXP (addr.disp, 0))
14949 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14950 if (addr.index)
14952 rtx idx = addr.index;
14953 if (addr.scale != 1)
14954 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14955 x = gen_rtx_PLUS (Pmode, idx, x);
14957 if (addr.base)
14958 x = gen_rtx_PLUS (Pmode, addr.base, x);
14959 if (MEM_P (orig_x))
14960 x = replace_equiv_address_nv (orig_x, x);
14961 return x;
14964 /* In the name of slightly smaller debug output, and to cater to
14965 general assembler lossage, recognize PIC+GOTOFF and turn it back
14966 into a direct symbol reference.
14968 On Darwin, this is necessary to avoid a crash, because Darwin
14969 has a different PIC label for each routine but the DWARF debugging
14970 information is not associated with any particular routine, so it's
14971 necessary to remove references to the PIC label from RTL stored by
14972 the DWARF output code. */
14974 static rtx
14975 ix86_delegitimize_address (rtx x)
14977 rtx orig_x = delegitimize_mem_from_attrs (x);
14978 /* addend is NULL or some rtx if x is something+GOTOFF where
14979 something doesn't include the PIC register. */
14980 rtx addend = NULL_RTX;
14981 /* reg_addend is NULL or a multiple of some register. */
14982 rtx reg_addend = NULL_RTX;
14983 /* const_addend is NULL or a const_int. */
14984 rtx const_addend = NULL_RTX;
14985 /* This is the result, or NULL. */
14986 rtx result = NULL_RTX;
14988 x = orig_x;
14990 if (MEM_P (x))
14991 x = XEXP (x, 0);
14993 if (TARGET_64BIT)
14995 if (GET_CODE (x) == CONST
14996 && GET_CODE (XEXP (x, 0)) == PLUS
14997 && GET_MODE (XEXP (x, 0)) == Pmode
14998 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14999 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
15000 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
15002 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
15003 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
15004 if (MEM_P (orig_x))
15005 x = replace_equiv_address_nv (orig_x, x);
15006 return x;
15009 if (GET_CODE (x) == CONST
15010 && GET_CODE (XEXP (x, 0)) == UNSPEC
15011 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
15012 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
15013 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
15015 x = XVECEXP (XEXP (x, 0), 0, 0);
15016 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
15018 x = simplify_gen_subreg (GET_MODE (orig_x), x,
15019 GET_MODE (x), 0);
15020 if (x == NULL_RTX)
15021 return orig_x;
15023 return x;
15026 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
15027 return ix86_delegitimize_tls_address (orig_x);
15029 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
15030 and -mcmodel=medium -fpic. */
15033 if (GET_CODE (x) != PLUS
15034 || GET_CODE (XEXP (x, 1)) != CONST)
15035 return ix86_delegitimize_tls_address (orig_x);
15037 if (ix86_pic_register_p (XEXP (x, 0)))
15038 /* %ebx + GOT/GOTOFF */
15040 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15042 /* %ebx + %reg * scale + GOT/GOTOFF */
15043 reg_addend = XEXP (x, 0);
15044 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
15045 reg_addend = XEXP (reg_addend, 1);
15046 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
15047 reg_addend = XEXP (reg_addend, 0);
15048 else
15050 reg_addend = NULL_RTX;
15051 addend = XEXP (x, 0);
15054 else
15055 addend = XEXP (x, 0);
15057 x = XEXP (XEXP (x, 1), 0);
15058 if (GET_CODE (x) == PLUS
15059 && CONST_INT_P (XEXP (x, 1)))
15061 const_addend = XEXP (x, 1);
15062 x = XEXP (x, 0);
15065 if (GET_CODE (x) == UNSPEC
15066 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
15067 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
15068 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
15069 && !MEM_P (orig_x) && !addend)))
15070 result = XVECEXP (x, 0, 0);
15072 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
15073 && !MEM_P (orig_x))
15074 result = XVECEXP (x, 0, 0);
15076 if (! result)
15077 return ix86_delegitimize_tls_address (orig_x);
15079 if (const_addend)
15080 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
15081 if (reg_addend)
15082 result = gen_rtx_PLUS (Pmode, reg_addend, result);
15083 if (addend)
15085 /* If the rest of original X doesn't involve the PIC register, add
15086 addend and subtract pic_offset_table_rtx. This can happen e.g.
15087 for code like:
15088 leal (%ebx, %ecx, 4), %ecx
15090 movl foo@GOTOFF(%ecx), %edx
15091 in which case we return (%ecx - %ebx) + foo
15092 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
15093 and reload has completed. */
15094 if (pic_offset_table_rtx
15095 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
15096 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
15097 pic_offset_table_rtx),
15098 result);
15099 else if (pic_offset_table_rtx && !TARGET_MACHO && !TARGET_VXWORKS_RTP)
15101 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
15102 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
15103 result = gen_rtx_PLUS (Pmode, tmp, result);
15105 else
15106 return orig_x;
15108 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
15110 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
15111 if (result == NULL_RTX)
15112 return orig_x;
15114 return result;
15117 /* If X is a machine specific address (i.e. a symbol or label being
15118 referenced as a displacement from the GOT implemented using an
15119 UNSPEC), then return the base term. Otherwise return X. */
15122 ix86_find_base_term (rtx x)
15124 rtx term;
15126 if (TARGET_64BIT)
15128 if (GET_CODE (x) != CONST)
15129 return x;
15130 term = XEXP (x, 0);
15131 if (GET_CODE (term) == PLUS
15132 && CONST_INT_P (XEXP (term, 1)))
15133 term = XEXP (term, 0);
15134 if (GET_CODE (term) != UNSPEC
15135 || (XINT (term, 1) != UNSPEC_GOTPCREL
15136 && XINT (term, 1) != UNSPEC_PCREL))
15137 return x;
15139 return XVECEXP (term, 0, 0);
15142 return ix86_delegitimize_address (x);
15145 static void
15146 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
15147 bool fp, FILE *file)
15149 const char *suffix;
15151 if (mode == CCFPmode || mode == CCFPUmode)
15153 code = ix86_fp_compare_code_to_integer (code);
15154 mode = CCmode;
15156 if (reverse)
15157 code = reverse_condition (code);
15159 switch (code)
15161 case EQ:
15162 switch (mode)
15164 case CCAmode:
15165 suffix = "a";
15166 break;
15167 case CCCmode:
15168 suffix = "c";
15169 break;
15170 case CCOmode:
15171 suffix = "o";
15172 break;
15173 case CCPmode:
15174 suffix = "p";
15175 break;
15176 case CCSmode:
15177 suffix = "s";
15178 break;
15179 default:
15180 suffix = "e";
15181 break;
15183 break;
15184 case NE:
15185 switch (mode)
15187 case CCAmode:
15188 suffix = "na";
15189 break;
15190 case CCCmode:
15191 suffix = "nc";
15192 break;
15193 case CCOmode:
15194 suffix = "no";
15195 break;
15196 case CCPmode:
15197 suffix = "np";
15198 break;
15199 case CCSmode:
15200 suffix = "ns";
15201 break;
15202 default:
15203 suffix = "ne";
15204 break;
15206 break;
15207 case GT:
15208 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
15209 suffix = "g";
15210 break;
15211 case GTU:
15212 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
15213 Those same assemblers have the same but opposite lossage on cmov. */
15214 if (mode == CCmode)
15215 suffix = fp ? "nbe" : "a";
15216 else
15217 gcc_unreachable ();
15218 break;
15219 case LT:
15220 switch (mode)
15222 case CCNOmode:
15223 case CCGOCmode:
15224 suffix = "s";
15225 break;
15227 case CCmode:
15228 case CCGCmode:
15229 suffix = "l";
15230 break;
15232 default:
15233 gcc_unreachable ();
15235 break;
15236 case LTU:
15237 if (mode == CCmode)
15238 suffix = "b";
15239 else if (mode == CCCmode)
15240 suffix = fp ? "b" : "c";
15241 else
15242 gcc_unreachable ();
15243 break;
15244 case GE:
15245 switch (mode)
15247 case CCNOmode:
15248 case CCGOCmode:
15249 suffix = "ns";
15250 break;
15252 case CCmode:
15253 case CCGCmode:
15254 suffix = "ge";
15255 break;
15257 default:
15258 gcc_unreachable ();
15260 break;
15261 case GEU:
15262 if (mode == CCmode)
15263 suffix = "nb";
15264 else if (mode == CCCmode)
15265 suffix = fp ? "nb" : "nc";
15266 else
15267 gcc_unreachable ();
15268 break;
15269 case LE:
15270 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
15271 suffix = "le";
15272 break;
15273 case LEU:
15274 if (mode == CCmode)
15275 suffix = "be";
15276 else
15277 gcc_unreachable ();
15278 break;
15279 case UNORDERED:
15280 suffix = fp ? "u" : "p";
15281 break;
15282 case ORDERED:
15283 suffix = fp ? "nu" : "np";
15284 break;
15285 default:
15286 gcc_unreachable ();
15288 fputs (suffix, file);
15291 /* Print the name of register X to FILE based on its machine mode and number.
15292 If CODE is 'w', pretend the mode is HImode.
15293 If CODE is 'b', pretend the mode is QImode.
15294 If CODE is 'k', pretend the mode is SImode.
15295 If CODE is 'q', pretend the mode is DImode.
15296 If CODE is 'x', pretend the mode is V4SFmode.
15297 If CODE is 't', pretend the mode is V8SFmode.
15298 If CODE is 'g', pretend the mode is V16SFmode.
15299 If CODE is 'h', pretend the reg is the 'high' byte register.
15300 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
15301 If CODE is 'd', duplicate the operand for AVX instruction.
15304 void
15305 print_reg (rtx x, int code, FILE *file)
15307 const char *reg;
15308 int msize;
15309 unsigned int regno;
15310 bool duplicated;
15312 if (ASSEMBLER_DIALECT == ASM_ATT)
15313 putc ('%', file);
15315 if (x == pc_rtx)
15317 gcc_assert (TARGET_64BIT);
15318 fputs ("rip", file);
15319 return;
15322 if (code == 'y' && STACK_TOP_P (x))
15324 fputs ("st(0)", file);
15325 return;
15328 if (code == 'w')
15329 msize = 2;
15330 else if (code == 'b')
15331 msize = 1;
15332 else if (code == 'k')
15333 msize = 4;
15334 else if (code == 'q')
15335 msize = 8;
15336 else if (code == 'h')
15337 msize = 0;
15338 else if (code == 'x')
15339 msize = 16;
15340 else if (code == 't')
15341 msize = 32;
15342 else if (code == 'g')
15343 msize = 64;
15344 else
15345 msize = GET_MODE_SIZE (GET_MODE (x));
15347 regno = true_regnum (x);
15349 gcc_assert (regno != ARG_POINTER_REGNUM
15350 && regno != FRAME_POINTER_REGNUM
15351 && regno != FLAGS_REG
15352 && regno != FPSR_REG
15353 && regno != FPCR_REG);
15355 duplicated = code == 'd' && TARGET_AVX;
15357 switch (msize)
15359 case 8:
15360 case 4:
15361 if (LEGACY_INT_REGNO_P (regno))
15362 putc (msize == 8 && TARGET_64BIT ? 'r' : 'e', file);
15363 case 16:
15364 case 12:
15365 case 2:
15366 normal:
15367 reg = hi_reg_name[regno];
15368 break;
15369 case 1:
15370 if (regno >= ARRAY_SIZE (qi_reg_name))
15371 goto normal;
15372 reg = qi_reg_name[regno];
15373 break;
15374 case 0:
15375 if (regno >= ARRAY_SIZE (qi_high_reg_name))
15376 goto normal;
15377 reg = qi_high_reg_name[regno];
15378 break;
15379 case 32:
15380 case 64:
15381 if (SSE_REGNO_P (regno))
15383 gcc_assert (!duplicated);
15384 putc (msize == 32 ? 'y' : 'z', file);
15385 reg = hi_reg_name[regno] + 1;
15386 break;
15388 goto normal;
15389 default:
15390 gcc_unreachable ();
15393 fputs (reg, file);
15395 /* Irritatingly, AMD extended registers use
15396 different naming convention: "r%d[bwd]" */
15397 if (REX_INT_REGNO_P (regno))
15399 gcc_assert (TARGET_64BIT);
15400 switch (msize)
15402 case 0:
15403 error ("extended registers have no high halves");
15404 break;
15405 case 1:
15406 putc ('b', file);
15407 break;
15408 case 2:
15409 putc ('w', file);
15410 break;
15411 case 4:
15412 putc ('d', file);
15413 break;
15414 case 8:
15415 /* no suffix */
15416 break;
15417 default:
15418 error ("unsupported operand size for extended register");
15419 break;
15421 return;
15424 if (duplicated)
15426 if (ASSEMBLER_DIALECT == ASM_ATT)
15427 fprintf (file, ", %%%s", reg);
15428 else
15429 fprintf (file, ", %s", reg);
15433 /* Meaning of CODE:
15434 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
15435 C -- print opcode suffix for set/cmov insn.
15436 c -- like C, but print reversed condition
15437 F,f -- likewise, but for floating-point.
15438 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
15439 otherwise nothing
15440 R -- print embeded rounding and sae.
15441 r -- print only sae.
15442 z -- print the opcode suffix for the size of the current operand.
15443 Z -- likewise, with special suffixes for x87 instructions.
15444 * -- print a star (in certain assembler syntax)
15445 A -- print an absolute memory reference.
15446 E -- print address with DImode register names if TARGET_64BIT.
15447 w -- print the operand as if it's a "word" (HImode) even if it isn't.
15448 s -- print a shift double count, followed by the assemblers argument
15449 delimiter.
15450 b -- print the QImode name of the register for the indicated operand.
15451 %b0 would print %al if operands[0] is reg 0.
15452 w -- likewise, print the HImode name of the register.
15453 k -- likewise, print the SImode name of the register.
15454 q -- likewise, print the DImode name of the register.
15455 x -- likewise, print the V4SFmode name of the register.
15456 t -- likewise, print the V8SFmode name of the register.
15457 g -- likewise, print the V16SFmode name of the register.
15458 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
15459 y -- print "st(0)" instead of "st" as a register.
15460 d -- print duplicated register operand for AVX instruction.
15461 D -- print condition for SSE cmp instruction.
15462 P -- if PIC, print an @PLT suffix.
15463 p -- print raw symbol name.
15464 X -- don't print any sort of PIC '@' suffix for a symbol.
15465 & -- print some in-use local-dynamic symbol name.
15466 H -- print a memory address offset by 8; used for sse high-parts
15467 Y -- print condition for XOP pcom* instruction.
15468 + -- print a branch hint as 'cs' or 'ds' prefix
15469 ; -- print a semicolon (after prefixes due to bug in older gas).
15470 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
15471 @ -- print a segment register of thread base pointer load
15472 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
15473 ! -- print MPX prefix for jxx/call/ret instructions if required.
15476 void
15477 ix86_print_operand (FILE *file, rtx x, int code)
15479 if (code)
15481 switch (code)
15483 case 'A':
15484 switch (ASSEMBLER_DIALECT)
15486 case ASM_ATT:
15487 putc ('*', file);
15488 break;
15490 case ASM_INTEL:
15491 /* Intel syntax. For absolute addresses, registers should not
15492 be surrounded by braces. */
15493 if (!REG_P (x))
15495 putc ('[', file);
15496 ix86_print_operand (file, x, 0);
15497 putc (']', file);
15498 return;
15500 break;
15502 default:
15503 gcc_unreachable ();
15506 ix86_print_operand (file, x, 0);
15507 return;
15509 case 'E':
15510 /* Wrap address in an UNSPEC to declare special handling. */
15511 if (TARGET_64BIT)
15512 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
15514 output_address (x);
15515 return;
15517 case 'L':
15518 if (ASSEMBLER_DIALECT == ASM_ATT)
15519 putc ('l', file);
15520 return;
15522 case 'W':
15523 if (ASSEMBLER_DIALECT == ASM_ATT)
15524 putc ('w', file);
15525 return;
15527 case 'B':
15528 if (ASSEMBLER_DIALECT == ASM_ATT)
15529 putc ('b', file);
15530 return;
15532 case 'Q':
15533 if (ASSEMBLER_DIALECT == ASM_ATT)
15534 putc ('l', file);
15535 return;
15537 case 'S':
15538 if (ASSEMBLER_DIALECT == ASM_ATT)
15539 putc ('s', file);
15540 return;
15542 case 'T':
15543 if (ASSEMBLER_DIALECT == ASM_ATT)
15544 putc ('t', file);
15545 return;
15547 case 'O':
15548 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15549 if (ASSEMBLER_DIALECT != ASM_ATT)
15550 return;
15552 switch (GET_MODE_SIZE (GET_MODE (x)))
15554 case 2:
15555 putc ('w', file);
15556 break;
15558 case 4:
15559 putc ('l', file);
15560 break;
15562 case 8:
15563 putc ('q', file);
15564 break;
15566 default:
15567 output_operand_lossage
15568 ("invalid operand size for operand code 'O'");
15569 return;
15572 putc ('.', file);
15573 #endif
15574 return;
15576 case 'z':
15577 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15579 /* Opcodes don't get size suffixes if using Intel opcodes. */
15580 if (ASSEMBLER_DIALECT == ASM_INTEL)
15581 return;
15583 switch (GET_MODE_SIZE (GET_MODE (x)))
15585 case 1:
15586 putc ('b', file);
15587 return;
15589 case 2:
15590 putc ('w', file);
15591 return;
15593 case 4:
15594 putc ('l', file);
15595 return;
15597 case 8:
15598 putc ('q', file);
15599 return;
15601 default:
15602 output_operand_lossage
15603 ("invalid operand size for operand code 'z'");
15604 return;
15608 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15609 warning
15610 (0, "non-integer operand used with operand code 'z'");
15611 /* FALLTHRU */
15613 case 'Z':
15614 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
15615 if (ASSEMBLER_DIALECT == ASM_INTEL)
15616 return;
15618 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15620 switch (GET_MODE_SIZE (GET_MODE (x)))
15622 case 2:
15623 #ifdef HAVE_AS_IX86_FILDS
15624 putc ('s', file);
15625 #endif
15626 return;
15628 case 4:
15629 putc ('l', file);
15630 return;
15632 case 8:
15633 #ifdef HAVE_AS_IX86_FILDQ
15634 putc ('q', file);
15635 #else
15636 fputs ("ll", file);
15637 #endif
15638 return;
15640 default:
15641 break;
15644 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15646 /* 387 opcodes don't get size suffixes
15647 if the operands are registers. */
15648 if (STACK_REG_P (x))
15649 return;
15651 switch (GET_MODE_SIZE (GET_MODE (x)))
15653 case 4:
15654 putc ('s', file);
15655 return;
15657 case 8:
15658 putc ('l', file);
15659 return;
15661 case 12:
15662 case 16:
15663 putc ('t', file);
15664 return;
15666 default:
15667 break;
15670 else
15672 output_operand_lossage
15673 ("invalid operand type used with operand code 'Z'");
15674 return;
15677 output_operand_lossage
15678 ("invalid operand size for operand code 'Z'");
15679 return;
15681 case 'd':
15682 case 'b':
15683 case 'w':
15684 case 'k':
15685 case 'q':
15686 case 'h':
15687 case 't':
15688 case 'g':
15689 case 'y':
15690 case 'x':
15691 case 'X':
15692 case 'P':
15693 case 'p':
15694 break;
15696 case 's':
15697 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15699 ix86_print_operand (file, x, 0);
15700 fputs (", ", file);
15702 return;
15704 case 'Y':
15705 switch (GET_CODE (x))
15707 case NE:
15708 fputs ("neq", file);
15709 break;
15710 case EQ:
15711 fputs ("eq", file);
15712 break;
15713 case GE:
15714 case GEU:
15715 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15716 break;
15717 case GT:
15718 case GTU:
15719 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15720 break;
15721 case LE:
15722 case LEU:
15723 fputs ("le", file);
15724 break;
15725 case LT:
15726 case LTU:
15727 fputs ("lt", file);
15728 break;
15729 case UNORDERED:
15730 fputs ("unord", file);
15731 break;
15732 case ORDERED:
15733 fputs ("ord", file);
15734 break;
15735 case UNEQ:
15736 fputs ("ueq", file);
15737 break;
15738 case UNGE:
15739 fputs ("nlt", file);
15740 break;
15741 case UNGT:
15742 fputs ("nle", file);
15743 break;
15744 case UNLE:
15745 fputs ("ule", file);
15746 break;
15747 case UNLT:
15748 fputs ("ult", file);
15749 break;
15750 case LTGT:
15751 fputs ("une", file);
15752 break;
15753 default:
15754 output_operand_lossage ("operand is not a condition code, "
15755 "invalid operand code 'Y'");
15756 return;
15758 return;
15760 case 'D':
15761 /* Little bit of braindamage here. The SSE compare instructions
15762 does use completely different names for the comparisons that the
15763 fp conditional moves. */
15764 switch (GET_CODE (x))
15766 case UNEQ:
15767 if (TARGET_AVX)
15769 fputs ("eq_us", file);
15770 break;
15772 case EQ:
15773 fputs ("eq", file);
15774 break;
15775 case UNLT:
15776 if (TARGET_AVX)
15778 fputs ("nge", file);
15779 break;
15781 case LT:
15782 fputs ("lt", file);
15783 break;
15784 case UNLE:
15785 if (TARGET_AVX)
15787 fputs ("ngt", file);
15788 break;
15790 case LE:
15791 fputs ("le", file);
15792 break;
15793 case UNORDERED:
15794 fputs ("unord", file);
15795 break;
15796 case LTGT:
15797 if (TARGET_AVX)
15799 fputs ("neq_oq", file);
15800 break;
15802 case NE:
15803 fputs ("neq", file);
15804 break;
15805 case GE:
15806 if (TARGET_AVX)
15808 fputs ("ge", file);
15809 break;
15811 case UNGE:
15812 fputs ("nlt", file);
15813 break;
15814 case GT:
15815 if (TARGET_AVX)
15817 fputs ("gt", file);
15818 break;
15820 case UNGT:
15821 fputs ("nle", file);
15822 break;
15823 case ORDERED:
15824 fputs ("ord", file);
15825 break;
15826 default:
15827 output_operand_lossage ("operand is not a condition code, "
15828 "invalid operand code 'D'");
15829 return;
15831 return;
15833 case 'F':
15834 case 'f':
15835 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15836 if (ASSEMBLER_DIALECT == ASM_ATT)
15837 putc ('.', file);
15838 #endif
15840 case 'C':
15841 case 'c':
15842 if (!COMPARISON_P (x))
15844 output_operand_lossage ("operand is not a condition code, "
15845 "invalid operand code '%c'", code);
15846 return;
15848 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15849 code == 'c' || code == 'f',
15850 code == 'F' || code == 'f',
15851 file);
15852 return;
15854 case 'H':
15855 if (!offsettable_memref_p (x))
15857 output_operand_lossage ("operand is not an offsettable memory "
15858 "reference, invalid operand code 'H'");
15859 return;
15861 /* It doesn't actually matter what mode we use here, as we're
15862 only going to use this for printing. */
15863 x = adjust_address_nv (x, DImode, 8);
15864 /* Output 'qword ptr' for intel assembler dialect. */
15865 if (ASSEMBLER_DIALECT == ASM_INTEL)
15866 code = 'q';
15867 break;
15869 case 'K':
15870 gcc_assert (CONST_INT_P (x));
15872 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15873 #ifdef HAVE_AS_IX86_HLE
15874 fputs ("xacquire ", file);
15875 #else
15876 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15877 #endif
15878 else if (INTVAL (x) & IX86_HLE_RELEASE)
15879 #ifdef HAVE_AS_IX86_HLE
15880 fputs ("xrelease ", file);
15881 #else
15882 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15883 #endif
15884 /* We do not want to print value of the operand. */
15885 return;
15887 case 'N':
15888 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15889 fputs ("{z}", file);
15890 return;
15892 case 'r':
15893 gcc_assert (CONST_INT_P (x));
15894 gcc_assert (INTVAL (x) == ROUND_SAE);
15896 if (ASSEMBLER_DIALECT == ASM_INTEL)
15897 fputs (", ", file);
15899 fputs ("{sae}", file);
15901 if (ASSEMBLER_DIALECT == ASM_ATT)
15902 fputs (", ", file);
15904 return;
15906 case 'R':
15907 gcc_assert (CONST_INT_P (x));
15909 if (ASSEMBLER_DIALECT == ASM_INTEL)
15910 fputs (", ", file);
15912 switch (INTVAL (x))
15914 case ROUND_NEAREST_INT | ROUND_SAE:
15915 fputs ("{rn-sae}", file);
15916 break;
15917 case ROUND_NEG_INF | ROUND_SAE:
15918 fputs ("{rd-sae}", file);
15919 break;
15920 case ROUND_POS_INF | ROUND_SAE:
15921 fputs ("{ru-sae}", file);
15922 break;
15923 case ROUND_ZERO | ROUND_SAE:
15924 fputs ("{rz-sae}", file);
15925 break;
15926 default:
15927 gcc_unreachable ();
15930 if (ASSEMBLER_DIALECT == ASM_ATT)
15931 fputs (", ", file);
15933 return;
15935 case '*':
15936 if (ASSEMBLER_DIALECT == ASM_ATT)
15937 putc ('*', file);
15938 return;
15940 case '&':
15942 const char *name = get_some_local_dynamic_name ();
15943 if (name == NULL)
15944 output_operand_lossage ("'%%&' used without any "
15945 "local dynamic TLS references");
15946 else
15947 assemble_name (file, name);
15948 return;
15951 case '+':
15953 rtx x;
15955 if (!optimize
15956 || optimize_function_for_size_p (cfun)
15957 || !TARGET_BRANCH_PREDICTION_HINTS)
15958 return;
15960 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15961 if (x)
15963 int pred_val = XINT (x, 0);
15965 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15966 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15968 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15969 bool cputaken
15970 = final_forward_branch_p (current_output_insn) == 0;
15972 /* Emit hints only in the case default branch prediction
15973 heuristics would fail. */
15974 if (taken != cputaken)
15976 /* We use 3e (DS) prefix for taken branches and
15977 2e (CS) prefix for not taken branches. */
15978 if (taken)
15979 fputs ("ds ; ", file);
15980 else
15981 fputs ("cs ; ", file);
15985 return;
15988 case ';':
15989 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15990 putc (';', file);
15991 #endif
15992 return;
15994 case '@':
15995 if (ASSEMBLER_DIALECT == ASM_ATT)
15996 putc ('%', file);
15998 /* The kernel uses a different segment register for performance
15999 reasons; a system call would not have to trash the userspace
16000 segment register, which would be expensive. */
16001 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
16002 fputs ("fs", file);
16003 else
16004 fputs ("gs", file);
16005 return;
16007 case '~':
16008 putc (TARGET_AVX2 ? 'i' : 'f', file);
16009 return;
16011 case '^':
16012 if (TARGET_64BIT && Pmode != word_mode)
16013 fputs ("addr32 ", file);
16014 return;
16016 case '!':
16017 if (ix86_bnd_prefixed_insn_p (current_output_insn))
16018 fputs ("bnd ", file);
16019 return;
16021 default:
16022 output_operand_lossage ("invalid operand code '%c'", code);
16026 if (REG_P (x))
16027 print_reg (x, code, file);
16029 else if (MEM_P (x))
16031 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
16032 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
16033 && GET_MODE (x) != BLKmode)
16035 const char * size;
16036 switch (GET_MODE_SIZE (GET_MODE (x)))
16038 case 1: size = "BYTE"; break;
16039 case 2: size = "WORD"; break;
16040 case 4: size = "DWORD"; break;
16041 case 8: size = "QWORD"; break;
16042 case 12: size = "TBYTE"; break;
16043 case 16:
16044 if (GET_MODE (x) == XFmode)
16045 size = "TBYTE";
16046 else
16047 size = "XMMWORD";
16048 break;
16049 case 32: size = "YMMWORD"; break;
16050 case 64: size = "ZMMWORD"; break;
16051 default:
16052 gcc_unreachable ();
16055 /* Check for explicit size override (codes 'b', 'w', 'k',
16056 'q' and 'x') */
16057 if (code == 'b')
16058 size = "BYTE";
16059 else if (code == 'w')
16060 size = "WORD";
16061 else if (code == 'k')
16062 size = "DWORD";
16063 else if (code == 'q')
16064 size = "QWORD";
16065 else if (code == 'x')
16066 size = "XMMWORD";
16068 fputs (size, file);
16069 fputs (" PTR ", file);
16072 x = XEXP (x, 0);
16073 /* Avoid (%rip) for call operands. */
16074 if (CONSTANT_ADDRESS_P (x) && code == 'P'
16075 && !CONST_INT_P (x))
16076 output_addr_const (file, x);
16077 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
16078 output_operand_lossage ("invalid constraints for operand");
16079 else
16080 output_address (x);
16083 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
16085 REAL_VALUE_TYPE r;
16086 long l;
16088 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
16089 REAL_VALUE_TO_TARGET_SINGLE (r, l);
16091 if (ASSEMBLER_DIALECT == ASM_ATT)
16092 putc ('$', file);
16093 /* Sign extend 32bit SFmode immediate to 8 bytes. */
16094 if (code == 'q')
16095 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
16096 (unsigned long long) (int) l);
16097 else
16098 fprintf (file, "0x%08x", (unsigned int) l);
16101 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
16103 REAL_VALUE_TYPE r;
16104 long l[2];
16106 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
16107 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
16109 if (ASSEMBLER_DIALECT == ASM_ATT)
16110 putc ('$', file);
16111 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
16114 /* These float cases don't actually occur as immediate operands. */
16115 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
16117 char dstr[30];
16119 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
16120 fputs (dstr, file);
16123 else
16125 /* We have patterns that allow zero sets of memory, for instance.
16126 In 64-bit mode, we should probably support all 8-byte vectors,
16127 since we can in fact encode that into an immediate. */
16128 if (GET_CODE (x) == CONST_VECTOR)
16130 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
16131 x = const0_rtx;
16134 if (code != 'P' && code != 'p')
16136 if (CONST_INT_P (x))
16138 if (ASSEMBLER_DIALECT == ASM_ATT)
16139 putc ('$', file);
16141 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
16142 || GET_CODE (x) == LABEL_REF)
16144 if (ASSEMBLER_DIALECT == ASM_ATT)
16145 putc ('$', file);
16146 else
16147 fputs ("OFFSET FLAT:", file);
16150 if (CONST_INT_P (x))
16151 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
16152 else if (flag_pic || MACHOPIC_INDIRECT)
16153 output_pic_addr_const (file, x, code);
16154 else
16155 output_addr_const (file, x);
16159 static bool
16160 ix86_print_operand_punct_valid_p (unsigned char code)
16162 return (code == '@' || code == '*' || code == '+' || code == '&'
16163 || code == ';' || code == '~' || code == '^' || code == '!');
16166 /* Print a memory operand whose address is ADDR. */
16168 static void
16169 ix86_print_operand_address (FILE *file, rtx addr)
16171 struct ix86_address parts;
16172 rtx base, index, disp;
16173 int scale;
16174 int ok;
16175 bool vsib = false;
16176 int code = 0;
16178 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
16180 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16181 gcc_assert (parts.index == NULL_RTX);
16182 parts.index = XVECEXP (addr, 0, 1);
16183 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
16184 addr = XVECEXP (addr, 0, 0);
16185 vsib = true;
16187 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
16189 gcc_assert (TARGET_64BIT);
16190 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16191 code = 'q';
16193 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDMK_ADDR)
16195 ok = ix86_decompose_address (XVECEXP (addr, 0, 1), &parts);
16196 gcc_assert (parts.base == NULL_RTX || parts.index == NULL_RTX);
16197 if (parts.base != NULL_RTX)
16199 parts.index = parts.base;
16200 parts.scale = 1;
16202 parts.base = XVECEXP (addr, 0, 0);
16203 addr = XVECEXP (addr, 0, 0);
16205 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDLDX_ADDR)
16207 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16208 gcc_assert (parts.index == NULL_RTX);
16209 parts.index = XVECEXP (addr, 0, 1);
16210 addr = XVECEXP (addr, 0, 0);
16212 else
16213 ok = ix86_decompose_address (addr, &parts);
16215 gcc_assert (ok);
16217 base = parts.base;
16218 index = parts.index;
16219 disp = parts.disp;
16220 scale = parts.scale;
16222 switch (parts.seg)
16224 case SEG_DEFAULT:
16225 break;
16226 case SEG_FS:
16227 case SEG_GS:
16228 if (ASSEMBLER_DIALECT == ASM_ATT)
16229 putc ('%', file);
16230 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
16231 break;
16232 default:
16233 gcc_unreachable ();
16236 /* Use one byte shorter RIP relative addressing for 64bit mode. */
16237 if (TARGET_64BIT && !base && !index)
16239 rtx symbol = disp;
16241 if (GET_CODE (disp) == CONST
16242 && GET_CODE (XEXP (disp, 0)) == PLUS
16243 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16244 symbol = XEXP (XEXP (disp, 0), 0);
16246 if (GET_CODE (symbol) == LABEL_REF
16247 || (GET_CODE (symbol) == SYMBOL_REF
16248 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
16249 base = pc_rtx;
16251 if (!base && !index)
16253 /* Displacement only requires special attention. */
16255 if (CONST_INT_P (disp))
16257 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
16258 fputs ("ds:", file);
16259 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
16261 else if (flag_pic)
16262 output_pic_addr_const (file, disp, 0);
16263 else
16264 output_addr_const (file, disp);
16266 else
16268 /* Print SImode register names to force addr32 prefix. */
16269 if (SImode_address_operand (addr, VOIDmode))
16271 #ifdef ENABLE_CHECKING
16272 gcc_assert (TARGET_64BIT);
16273 switch (GET_CODE (addr))
16275 case SUBREG:
16276 gcc_assert (GET_MODE (addr) == SImode);
16277 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
16278 break;
16279 case ZERO_EXTEND:
16280 case AND:
16281 gcc_assert (GET_MODE (addr) == DImode);
16282 break;
16283 default:
16284 gcc_unreachable ();
16286 #endif
16287 gcc_assert (!code);
16288 code = 'k';
16290 else if (code == 0
16291 && TARGET_X32
16292 && disp
16293 && CONST_INT_P (disp)
16294 && INTVAL (disp) < -16*1024*1024)
16296 /* X32 runs in 64-bit mode, where displacement, DISP, in
16297 address DISP(%r64), is encoded as 32-bit immediate sign-
16298 extended from 32-bit to 64-bit. For -0x40000300(%r64),
16299 address is %r64 + 0xffffffffbffffd00. When %r64 <
16300 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
16301 which is invalid for x32. The correct address is %r64
16302 - 0x40000300 == 0xf7ffdd64. To properly encode
16303 -0x40000300(%r64) for x32, we zero-extend negative
16304 displacement by forcing addr32 prefix which truncates
16305 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
16306 zero-extend all negative displacements, including -1(%rsp).
16307 However, for small negative displacements, sign-extension
16308 won't cause overflow. We only zero-extend negative
16309 displacements if they < -16*1024*1024, which is also used
16310 to check legitimate address displacements for PIC. */
16311 code = 'k';
16314 if (ASSEMBLER_DIALECT == ASM_ATT)
16316 if (disp)
16318 if (flag_pic)
16319 output_pic_addr_const (file, disp, 0);
16320 else if (GET_CODE (disp) == LABEL_REF)
16321 output_asm_label (disp);
16322 else
16323 output_addr_const (file, disp);
16326 putc ('(', file);
16327 if (base)
16328 print_reg (base, code, file);
16329 if (index)
16331 putc (',', file);
16332 print_reg (index, vsib ? 0 : code, file);
16333 if (scale != 1 || vsib)
16334 fprintf (file, ",%d", scale);
16336 putc (')', file);
16338 else
16340 rtx offset = NULL_RTX;
16342 if (disp)
16344 /* Pull out the offset of a symbol; print any symbol itself. */
16345 if (GET_CODE (disp) == CONST
16346 && GET_CODE (XEXP (disp, 0)) == PLUS
16347 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16349 offset = XEXP (XEXP (disp, 0), 1);
16350 disp = gen_rtx_CONST (VOIDmode,
16351 XEXP (XEXP (disp, 0), 0));
16354 if (flag_pic)
16355 output_pic_addr_const (file, disp, 0);
16356 else if (GET_CODE (disp) == LABEL_REF)
16357 output_asm_label (disp);
16358 else if (CONST_INT_P (disp))
16359 offset = disp;
16360 else
16361 output_addr_const (file, disp);
16364 putc ('[', file);
16365 if (base)
16367 print_reg (base, code, file);
16368 if (offset)
16370 if (INTVAL (offset) >= 0)
16371 putc ('+', file);
16372 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16375 else if (offset)
16376 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16377 else
16378 putc ('0', file);
16380 if (index)
16382 putc ('+', file);
16383 print_reg (index, vsib ? 0 : code, file);
16384 if (scale != 1 || vsib)
16385 fprintf (file, "*%d", scale);
16387 putc (']', file);
16392 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
16394 static bool
16395 i386_asm_output_addr_const_extra (FILE *file, rtx x)
16397 rtx op;
16399 if (GET_CODE (x) != UNSPEC)
16400 return false;
16402 op = XVECEXP (x, 0, 0);
16403 switch (XINT (x, 1))
16405 case UNSPEC_GOTTPOFF:
16406 output_addr_const (file, op);
16407 /* FIXME: This might be @TPOFF in Sun ld. */
16408 fputs ("@gottpoff", file);
16409 break;
16410 case UNSPEC_TPOFF:
16411 output_addr_const (file, op);
16412 fputs ("@tpoff", file);
16413 break;
16414 case UNSPEC_NTPOFF:
16415 output_addr_const (file, op);
16416 if (TARGET_64BIT)
16417 fputs ("@tpoff", file);
16418 else
16419 fputs ("@ntpoff", file);
16420 break;
16421 case UNSPEC_DTPOFF:
16422 output_addr_const (file, op);
16423 fputs ("@dtpoff", file);
16424 break;
16425 case UNSPEC_GOTNTPOFF:
16426 output_addr_const (file, op);
16427 if (TARGET_64BIT)
16428 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16429 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
16430 else
16431 fputs ("@gotntpoff", file);
16432 break;
16433 case UNSPEC_INDNTPOFF:
16434 output_addr_const (file, op);
16435 fputs ("@indntpoff", file);
16436 break;
16437 #if TARGET_MACHO
16438 case UNSPEC_MACHOPIC_OFFSET:
16439 output_addr_const (file, op);
16440 putc ('-', file);
16441 machopic_output_function_base_name (file);
16442 break;
16443 #endif
16445 case UNSPEC_STACK_CHECK:
16447 int offset;
16449 gcc_assert (flag_split_stack);
16451 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
16452 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
16453 #else
16454 gcc_unreachable ();
16455 #endif
16457 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
16459 break;
16461 default:
16462 return false;
16465 return true;
16468 /* Split one or more double-mode RTL references into pairs of half-mode
16469 references. The RTL can be REG, offsettable MEM, integer constant, or
16470 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
16471 split and "num" is its length. lo_half and hi_half are output arrays
16472 that parallel "operands". */
16474 void
16475 split_double_mode (machine_mode mode, rtx operands[],
16476 int num, rtx lo_half[], rtx hi_half[])
16478 machine_mode half_mode;
16479 unsigned int byte;
16481 switch (mode)
16483 case TImode:
16484 half_mode = DImode;
16485 break;
16486 case DImode:
16487 half_mode = SImode;
16488 break;
16489 default:
16490 gcc_unreachable ();
16493 byte = GET_MODE_SIZE (half_mode);
16495 while (num--)
16497 rtx op = operands[num];
16499 /* simplify_subreg refuse to split volatile memory addresses,
16500 but we still have to handle it. */
16501 if (MEM_P (op))
16503 lo_half[num] = adjust_address (op, half_mode, 0);
16504 hi_half[num] = adjust_address (op, half_mode, byte);
16506 else
16508 lo_half[num] = simplify_gen_subreg (half_mode, op,
16509 GET_MODE (op) == VOIDmode
16510 ? mode : GET_MODE (op), 0);
16511 hi_half[num] = simplify_gen_subreg (half_mode, op,
16512 GET_MODE (op) == VOIDmode
16513 ? mode : GET_MODE (op), byte);
16518 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
16519 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
16520 is the expression of the binary operation. The output may either be
16521 emitted here, or returned to the caller, like all output_* functions.
16523 There is no guarantee that the operands are the same mode, as they
16524 might be within FLOAT or FLOAT_EXTEND expressions. */
16526 #ifndef SYSV386_COMPAT
16527 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
16528 wants to fix the assemblers because that causes incompatibility
16529 with gcc. No-one wants to fix gcc because that causes
16530 incompatibility with assemblers... You can use the option of
16531 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
16532 #define SYSV386_COMPAT 1
16533 #endif
16535 const char *
16536 output_387_binary_op (rtx insn, rtx *operands)
16538 static char buf[40];
16539 const char *p;
16540 const char *ssep;
16541 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
16543 #ifdef ENABLE_CHECKING
16544 /* Even if we do not want to check the inputs, this documents input
16545 constraints. Which helps in understanding the following code. */
16546 if (STACK_REG_P (operands[0])
16547 && ((REG_P (operands[1])
16548 && REGNO (operands[0]) == REGNO (operands[1])
16549 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
16550 || (REG_P (operands[2])
16551 && REGNO (operands[0]) == REGNO (operands[2])
16552 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
16553 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
16554 ; /* ok */
16555 else
16556 gcc_assert (is_sse);
16557 #endif
16559 switch (GET_CODE (operands[3]))
16561 case PLUS:
16562 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16563 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16564 p = "fiadd";
16565 else
16566 p = "fadd";
16567 ssep = "vadd";
16568 break;
16570 case MINUS:
16571 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16572 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16573 p = "fisub";
16574 else
16575 p = "fsub";
16576 ssep = "vsub";
16577 break;
16579 case MULT:
16580 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16581 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16582 p = "fimul";
16583 else
16584 p = "fmul";
16585 ssep = "vmul";
16586 break;
16588 case DIV:
16589 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16590 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16591 p = "fidiv";
16592 else
16593 p = "fdiv";
16594 ssep = "vdiv";
16595 break;
16597 default:
16598 gcc_unreachable ();
16601 if (is_sse)
16603 if (TARGET_AVX)
16605 strcpy (buf, ssep);
16606 if (GET_MODE (operands[0]) == SFmode)
16607 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
16608 else
16609 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
16611 else
16613 strcpy (buf, ssep + 1);
16614 if (GET_MODE (operands[0]) == SFmode)
16615 strcat (buf, "ss\t{%2, %0|%0, %2}");
16616 else
16617 strcat (buf, "sd\t{%2, %0|%0, %2}");
16619 return buf;
16621 strcpy (buf, p);
16623 switch (GET_CODE (operands[3]))
16625 case MULT:
16626 case PLUS:
16627 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
16628 std::swap (operands[1], operands[2]);
16630 /* know operands[0] == operands[1]. */
16632 if (MEM_P (operands[2]))
16634 p = "%Z2\t%2";
16635 break;
16638 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16640 if (STACK_TOP_P (operands[0]))
16641 /* How is it that we are storing to a dead operand[2]?
16642 Well, presumably operands[1] is dead too. We can't
16643 store the result to st(0) as st(0) gets popped on this
16644 instruction. Instead store to operands[2] (which I
16645 think has to be st(1)). st(1) will be popped later.
16646 gcc <= 2.8.1 didn't have this check and generated
16647 assembly code that the Unixware assembler rejected. */
16648 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16649 else
16650 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16651 break;
16654 if (STACK_TOP_P (operands[0]))
16655 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16656 else
16657 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16658 break;
16660 case MINUS:
16661 case DIV:
16662 if (MEM_P (operands[1]))
16664 p = "r%Z1\t%1";
16665 break;
16668 if (MEM_P (operands[2]))
16670 p = "%Z2\t%2";
16671 break;
16674 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16676 #if SYSV386_COMPAT
16677 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
16678 derived assemblers, confusingly reverse the direction of
16679 the operation for fsub{r} and fdiv{r} when the
16680 destination register is not st(0). The Intel assembler
16681 doesn't have this brain damage. Read !SYSV386_COMPAT to
16682 figure out what the hardware really does. */
16683 if (STACK_TOP_P (operands[0]))
16684 p = "{p\t%0, %2|rp\t%2, %0}";
16685 else
16686 p = "{rp\t%2, %0|p\t%0, %2}";
16687 #else
16688 if (STACK_TOP_P (operands[0]))
16689 /* As above for fmul/fadd, we can't store to st(0). */
16690 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16691 else
16692 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16693 #endif
16694 break;
16697 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16699 #if SYSV386_COMPAT
16700 if (STACK_TOP_P (operands[0]))
16701 p = "{rp\t%0, %1|p\t%1, %0}";
16702 else
16703 p = "{p\t%1, %0|rp\t%0, %1}";
16704 #else
16705 if (STACK_TOP_P (operands[0]))
16706 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16707 else
16708 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16709 #endif
16710 break;
16713 if (STACK_TOP_P (operands[0]))
16715 if (STACK_TOP_P (operands[1]))
16716 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16717 else
16718 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16719 break;
16721 else if (STACK_TOP_P (operands[1]))
16723 #if SYSV386_COMPAT
16724 p = "{\t%1, %0|r\t%0, %1}";
16725 #else
16726 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16727 #endif
16729 else
16731 #if SYSV386_COMPAT
16732 p = "{r\t%2, %0|\t%0, %2}";
16733 #else
16734 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16735 #endif
16737 break;
16739 default:
16740 gcc_unreachable ();
16743 strcat (buf, p);
16744 return buf;
16747 /* Check if a 256bit AVX register is referenced inside of EXP. */
16749 static bool
16750 ix86_check_avx256_register (const_rtx exp)
16752 if (GET_CODE (exp) == SUBREG)
16753 exp = SUBREG_REG (exp);
16755 return (REG_P (exp)
16756 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)));
16759 /* Return needed mode for entity in optimize_mode_switching pass. */
16761 static int
16762 ix86_avx_u128_mode_needed (rtx_insn *insn)
16764 if (CALL_P (insn))
16766 rtx link;
16768 /* Needed mode is set to AVX_U128_CLEAN if there are
16769 no 256bit modes used in function arguments. */
16770 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16771 link;
16772 link = XEXP (link, 1))
16774 if (GET_CODE (XEXP (link, 0)) == USE)
16776 rtx arg = XEXP (XEXP (link, 0), 0);
16778 if (ix86_check_avx256_register (arg))
16779 return AVX_U128_DIRTY;
16783 return AVX_U128_CLEAN;
16786 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16787 changes state only when a 256bit register is written to, but we need
16788 to prevent the compiler from moving optimal insertion point above
16789 eventual read from 256bit register. */
16790 subrtx_iterator::array_type array;
16791 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
16792 if (ix86_check_avx256_register (*iter))
16793 return AVX_U128_DIRTY;
16795 return AVX_U128_ANY;
16798 /* Return mode that i387 must be switched into
16799 prior to the execution of insn. */
16801 static int
16802 ix86_i387_mode_needed (int entity, rtx_insn *insn)
16804 enum attr_i387_cw mode;
16806 /* The mode UNINITIALIZED is used to store control word after a
16807 function call or ASM pattern. The mode ANY specify that function
16808 has no requirements on the control word and make no changes in the
16809 bits we are interested in. */
16811 if (CALL_P (insn)
16812 || (NONJUMP_INSN_P (insn)
16813 && (asm_noperands (PATTERN (insn)) >= 0
16814 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16815 return I387_CW_UNINITIALIZED;
16817 if (recog_memoized (insn) < 0)
16818 return I387_CW_ANY;
16820 mode = get_attr_i387_cw (insn);
16822 switch (entity)
16824 case I387_TRUNC:
16825 if (mode == I387_CW_TRUNC)
16826 return mode;
16827 break;
16829 case I387_FLOOR:
16830 if (mode == I387_CW_FLOOR)
16831 return mode;
16832 break;
16834 case I387_CEIL:
16835 if (mode == I387_CW_CEIL)
16836 return mode;
16837 break;
16839 case I387_MASK_PM:
16840 if (mode == I387_CW_MASK_PM)
16841 return mode;
16842 break;
16844 default:
16845 gcc_unreachable ();
16848 return I387_CW_ANY;
16851 /* Return mode that entity must be switched into
16852 prior to the execution of insn. */
16854 static int
16855 ix86_mode_needed (int entity, rtx_insn *insn)
16857 switch (entity)
16859 case AVX_U128:
16860 return ix86_avx_u128_mode_needed (insn);
16861 case I387_TRUNC:
16862 case I387_FLOOR:
16863 case I387_CEIL:
16864 case I387_MASK_PM:
16865 return ix86_i387_mode_needed (entity, insn);
16866 default:
16867 gcc_unreachable ();
16869 return 0;
16872 /* Check if a 256bit AVX register is referenced in stores. */
16874 static void
16875 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
16877 if (ix86_check_avx256_register (dest))
16879 bool *used = (bool *) data;
16880 *used = true;
16884 /* Calculate mode of upper 128bit AVX registers after the insn. */
16886 static int
16887 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
16889 rtx pat = PATTERN (insn);
16891 if (vzeroupper_operation (pat, VOIDmode)
16892 || vzeroall_operation (pat, VOIDmode))
16893 return AVX_U128_CLEAN;
16895 /* We know that state is clean after CALL insn if there are no
16896 256bit registers used in the function return register. */
16897 if (CALL_P (insn))
16899 bool avx_reg256_found = false;
16900 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16902 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16905 /* Otherwise, return current mode. Remember that if insn
16906 references AVX 256bit registers, the mode was already changed
16907 to DIRTY from MODE_NEEDED. */
16908 return mode;
16911 /* Return the mode that an insn results in. */
16913 static int
16914 ix86_mode_after (int entity, int mode, rtx_insn *insn)
16916 switch (entity)
16918 case AVX_U128:
16919 return ix86_avx_u128_mode_after (mode, insn);
16920 case I387_TRUNC:
16921 case I387_FLOOR:
16922 case I387_CEIL:
16923 case I387_MASK_PM:
16924 return mode;
16925 default:
16926 gcc_unreachable ();
16930 static int
16931 ix86_avx_u128_mode_entry (void)
16933 tree arg;
16935 /* Entry mode is set to AVX_U128_DIRTY if there are
16936 256bit modes used in function arguments. */
16937 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16938 arg = TREE_CHAIN (arg))
16940 rtx incoming = DECL_INCOMING_RTL (arg);
16942 if (incoming && ix86_check_avx256_register (incoming))
16943 return AVX_U128_DIRTY;
16946 return AVX_U128_CLEAN;
16949 /* Return a mode that ENTITY is assumed to be
16950 switched to at function entry. */
16952 static int
16953 ix86_mode_entry (int entity)
16955 switch (entity)
16957 case AVX_U128:
16958 return ix86_avx_u128_mode_entry ();
16959 case I387_TRUNC:
16960 case I387_FLOOR:
16961 case I387_CEIL:
16962 case I387_MASK_PM:
16963 return I387_CW_ANY;
16964 default:
16965 gcc_unreachable ();
16969 static int
16970 ix86_avx_u128_mode_exit (void)
16972 rtx reg = crtl->return_rtx;
16974 /* Exit mode is set to AVX_U128_DIRTY if there are
16975 256bit modes used in the function return register. */
16976 if (reg && ix86_check_avx256_register (reg))
16977 return AVX_U128_DIRTY;
16979 return AVX_U128_CLEAN;
16982 /* Return a mode that ENTITY is assumed to be
16983 switched to at function exit. */
16985 static int
16986 ix86_mode_exit (int entity)
16988 switch (entity)
16990 case AVX_U128:
16991 return ix86_avx_u128_mode_exit ();
16992 case I387_TRUNC:
16993 case I387_FLOOR:
16994 case I387_CEIL:
16995 case I387_MASK_PM:
16996 return I387_CW_ANY;
16997 default:
16998 gcc_unreachable ();
17002 static int
17003 ix86_mode_priority (int, int n)
17005 return n;
17008 /* Output code to initialize control word copies used by trunc?f?i and
17009 rounding patterns. CURRENT_MODE is set to current control word,
17010 while NEW_MODE is set to new control word. */
17012 static void
17013 emit_i387_cw_initialization (int mode)
17015 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
17016 rtx new_mode;
17018 enum ix86_stack_slot slot;
17020 rtx reg = gen_reg_rtx (HImode);
17022 emit_insn (gen_x86_fnstcw_1 (stored_mode));
17023 emit_move_insn (reg, copy_rtx (stored_mode));
17025 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
17026 || optimize_insn_for_size_p ())
17028 switch (mode)
17030 case I387_CW_TRUNC:
17031 /* round toward zero (truncate) */
17032 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
17033 slot = SLOT_CW_TRUNC;
17034 break;
17036 case I387_CW_FLOOR:
17037 /* round down toward -oo */
17038 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
17039 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
17040 slot = SLOT_CW_FLOOR;
17041 break;
17043 case I387_CW_CEIL:
17044 /* round up toward +oo */
17045 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
17046 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
17047 slot = SLOT_CW_CEIL;
17048 break;
17050 case I387_CW_MASK_PM:
17051 /* mask precision exception for nearbyint() */
17052 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
17053 slot = SLOT_CW_MASK_PM;
17054 break;
17056 default:
17057 gcc_unreachable ();
17060 else
17062 switch (mode)
17064 case I387_CW_TRUNC:
17065 /* round toward zero (truncate) */
17066 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
17067 slot = SLOT_CW_TRUNC;
17068 break;
17070 case I387_CW_FLOOR:
17071 /* round down toward -oo */
17072 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
17073 slot = SLOT_CW_FLOOR;
17074 break;
17076 case I387_CW_CEIL:
17077 /* round up toward +oo */
17078 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
17079 slot = SLOT_CW_CEIL;
17080 break;
17082 case I387_CW_MASK_PM:
17083 /* mask precision exception for nearbyint() */
17084 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
17085 slot = SLOT_CW_MASK_PM;
17086 break;
17088 default:
17089 gcc_unreachable ();
17093 gcc_assert (slot < MAX_386_STACK_LOCALS);
17095 new_mode = assign_386_stack_local (HImode, slot);
17096 emit_move_insn (new_mode, reg);
17099 /* Emit vzeroupper. */
17101 void
17102 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
17104 int i;
17106 /* Cancel automatic vzeroupper insertion if there are
17107 live call-saved SSE registers at the insertion point. */
17109 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
17110 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
17111 return;
17113 if (TARGET_64BIT)
17114 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
17115 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
17116 return;
17118 emit_insn (gen_avx_vzeroupper ());
17121 /* Generate one or more insns to set ENTITY to MODE. */
17123 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
17124 is the set of hard registers live at the point where the insn(s)
17125 are to be inserted. */
17127 static void
17128 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
17129 HARD_REG_SET regs_live)
17131 switch (entity)
17133 case AVX_U128:
17134 if (mode == AVX_U128_CLEAN)
17135 ix86_avx_emit_vzeroupper (regs_live);
17136 break;
17137 case I387_TRUNC:
17138 case I387_FLOOR:
17139 case I387_CEIL:
17140 case I387_MASK_PM:
17141 if (mode != I387_CW_ANY
17142 && mode != I387_CW_UNINITIALIZED)
17143 emit_i387_cw_initialization (mode);
17144 break;
17145 default:
17146 gcc_unreachable ();
17150 /* Output code for INSN to convert a float to a signed int. OPERANDS
17151 are the insn operands. The output may be [HSD]Imode and the input
17152 operand may be [SDX]Fmode. */
17154 const char *
17155 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
17157 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
17158 int dimode_p = GET_MODE (operands[0]) == DImode;
17159 int round_mode = get_attr_i387_cw (insn);
17161 /* Jump through a hoop or two for DImode, since the hardware has no
17162 non-popping instruction. We used to do this a different way, but
17163 that was somewhat fragile and broke with post-reload splitters. */
17164 if ((dimode_p || fisttp) && !stack_top_dies)
17165 output_asm_insn ("fld\t%y1", operands);
17167 gcc_assert (STACK_TOP_P (operands[1]));
17168 gcc_assert (MEM_P (operands[0]));
17169 gcc_assert (GET_MODE (operands[1]) != TFmode);
17171 if (fisttp)
17172 output_asm_insn ("fisttp%Z0\t%0", operands);
17173 else
17175 if (round_mode != I387_CW_ANY)
17176 output_asm_insn ("fldcw\t%3", operands);
17177 if (stack_top_dies || dimode_p)
17178 output_asm_insn ("fistp%Z0\t%0", operands);
17179 else
17180 output_asm_insn ("fist%Z0\t%0", operands);
17181 if (round_mode != I387_CW_ANY)
17182 output_asm_insn ("fldcw\t%2", operands);
17185 return "";
17188 /* Output code for x87 ffreep insn. The OPNO argument, which may only
17189 have the values zero or one, indicates the ffreep insn's operand
17190 from the OPERANDS array. */
17192 static const char *
17193 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
17195 if (TARGET_USE_FFREEP)
17196 #ifdef HAVE_AS_IX86_FFREEP
17197 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
17198 #else
17200 static char retval[32];
17201 int regno = REGNO (operands[opno]);
17203 gcc_assert (STACK_REGNO_P (regno));
17205 regno -= FIRST_STACK_REG;
17207 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
17208 return retval;
17210 #endif
17212 return opno ? "fstp\t%y1" : "fstp\t%y0";
17216 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
17217 should be used. UNORDERED_P is true when fucom should be used. */
17219 const char *
17220 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
17222 int stack_top_dies;
17223 rtx cmp_op0, cmp_op1;
17224 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
17226 if (eflags_p)
17228 cmp_op0 = operands[0];
17229 cmp_op1 = operands[1];
17231 else
17233 cmp_op0 = operands[1];
17234 cmp_op1 = operands[2];
17237 if (is_sse)
17239 if (GET_MODE (operands[0]) == SFmode)
17240 if (unordered_p)
17241 return "%vucomiss\t{%1, %0|%0, %1}";
17242 else
17243 return "%vcomiss\t{%1, %0|%0, %1}";
17244 else
17245 if (unordered_p)
17246 return "%vucomisd\t{%1, %0|%0, %1}";
17247 else
17248 return "%vcomisd\t{%1, %0|%0, %1}";
17251 gcc_assert (STACK_TOP_P (cmp_op0));
17253 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
17255 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
17257 if (stack_top_dies)
17259 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
17260 return output_387_ffreep (operands, 1);
17262 else
17263 return "ftst\n\tfnstsw\t%0";
17266 if (STACK_REG_P (cmp_op1)
17267 && stack_top_dies
17268 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
17269 && REGNO (cmp_op1) != FIRST_STACK_REG)
17271 /* If both the top of the 387 stack dies, and the other operand
17272 is also a stack register that dies, then this must be a
17273 `fcompp' float compare */
17275 if (eflags_p)
17277 /* There is no double popping fcomi variant. Fortunately,
17278 eflags is immune from the fstp's cc clobbering. */
17279 if (unordered_p)
17280 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
17281 else
17282 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
17283 return output_387_ffreep (operands, 0);
17285 else
17287 if (unordered_p)
17288 return "fucompp\n\tfnstsw\t%0";
17289 else
17290 return "fcompp\n\tfnstsw\t%0";
17293 else
17295 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
17297 static const char * const alt[16] =
17299 "fcom%Z2\t%y2\n\tfnstsw\t%0",
17300 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
17301 "fucom%Z2\t%y2\n\tfnstsw\t%0",
17302 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
17304 "ficom%Z2\t%y2\n\tfnstsw\t%0",
17305 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
17306 NULL,
17307 NULL,
17309 "fcomi\t{%y1, %0|%0, %y1}",
17310 "fcomip\t{%y1, %0|%0, %y1}",
17311 "fucomi\t{%y1, %0|%0, %y1}",
17312 "fucomip\t{%y1, %0|%0, %y1}",
17314 NULL,
17315 NULL,
17316 NULL,
17317 NULL
17320 int mask;
17321 const char *ret;
17323 mask = eflags_p << 3;
17324 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
17325 mask |= unordered_p << 1;
17326 mask |= stack_top_dies;
17328 gcc_assert (mask < 16);
17329 ret = alt[mask];
17330 gcc_assert (ret);
17332 return ret;
17336 void
17337 ix86_output_addr_vec_elt (FILE *file, int value)
17339 const char *directive = ASM_LONG;
17341 #ifdef ASM_QUAD
17342 if (TARGET_LP64)
17343 directive = ASM_QUAD;
17344 #else
17345 gcc_assert (!TARGET_64BIT);
17346 #endif
17348 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
17351 void
17352 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
17354 const char *directive = ASM_LONG;
17356 #ifdef ASM_QUAD
17357 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
17358 directive = ASM_QUAD;
17359 #else
17360 gcc_assert (!TARGET_64BIT);
17361 #endif
17362 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
17363 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
17364 fprintf (file, "%s%s%d-%s%d\n",
17365 directive, LPREFIX, value, LPREFIX, rel);
17366 else if (HAVE_AS_GOTOFF_IN_DATA)
17367 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
17368 #if TARGET_MACHO
17369 else if (TARGET_MACHO)
17371 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
17372 machopic_output_function_base_name (file);
17373 putc ('\n', file);
17375 #endif
17376 else
17377 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
17378 GOT_SYMBOL_NAME, LPREFIX, value);
17381 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
17382 for the target. */
17384 void
17385 ix86_expand_clear (rtx dest)
17387 rtx tmp;
17389 /* We play register width games, which are only valid after reload. */
17390 gcc_assert (reload_completed);
17392 /* Avoid HImode and its attendant prefix byte. */
17393 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
17394 dest = gen_rtx_REG (SImode, REGNO (dest));
17395 tmp = gen_rtx_SET (dest, const0_rtx);
17397 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
17399 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17400 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
17403 emit_insn (tmp);
17406 /* X is an unchanging MEM. If it is a constant pool reference, return
17407 the constant pool rtx, else NULL. */
17410 maybe_get_pool_constant (rtx x)
17412 x = ix86_delegitimize_address (XEXP (x, 0));
17414 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
17415 return get_pool_constant (x);
17417 return NULL_RTX;
17420 void
17421 ix86_expand_move (machine_mode mode, rtx operands[])
17423 rtx op0, op1;
17424 enum tls_model model;
17426 op0 = operands[0];
17427 op1 = operands[1];
17429 if (GET_CODE (op1) == SYMBOL_REF)
17431 rtx tmp;
17433 model = SYMBOL_REF_TLS_MODEL (op1);
17434 if (model)
17436 op1 = legitimize_tls_address (op1, model, true);
17437 op1 = force_operand (op1, op0);
17438 if (op1 == op0)
17439 return;
17440 op1 = convert_to_mode (mode, op1, 1);
17442 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
17443 op1 = tmp;
17445 else if (GET_CODE (op1) == CONST
17446 && GET_CODE (XEXP (op1, 0)) == PLUS
17447 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
17449 rtx addend = XEXP (XEXP (op1, 0), 1);
17450 rtx symbol = XEXP (XEXP (op1, 0), 0);
17451 rtx tmp;
17453 model = SYMBOL_REF_TLS_MODEL (symbol);
17454 if (model)
17455 tmp = legitimize_tls_address (symbol, model, true);
17456 else
17457 tmp = legitimize_pe_coff_symbol (symbol, true);
17459 if (tmp)
17461 tmp = force_operand (tmp, NULL);
17462 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
17463 op0, 1, OPTAB_DIRECT);
17464 if (tmp == op0)
17465 return;
17466 op1 = convert_to_mode (mode, tmp, 1);
17470 if ((flag_pic || MACHOPIC_INDIRECT)
17471 && symbolic_operand (op1, mode))
17473 if (TARGET_MACHO && !TARGET_64BIT)
17475 #if TARGET_MACHO
17476 /* dynamic-no-pic */
17477 if (MACHOPIC_INDIRECT)
17479 rtx temp = (op0 && REG_P (op0) && mode == Pmode)
17480 ? op0 : gen_reg_rtx (Pmode);
17481 op1 = machopic_indirect_data_reference (op1, temp);
17482 if (MACHOPIC_PURE)
17483 op1 = machopic_legitimize_pic_address (op1, mode,
17484 temp == op1 ? 0 : temp);
17486 if (op0 != op1 && GET_CODE (op0) != MEM)
17488 rtx insn = gen_rtx_SET (op0, op1);
17489 emit_insn (insn);
17490 return;
17492 if (GET_CODE (op0) == MEM)
17493 op1 = force_reg (Pmode, op1);
17494 else
17496 rtx temp = op0;
17497 if (GET_CODE (temp) != REG)
17498 temp = gen_reg_rtx (Pmode);
17499 temp = legitimize_pic_address (op1, temp);
17500 if (temp == op0)
17501 return;
17502 op1 = temp;
17504 /* dynamic-no-pic */
17505 #endif
17507 else
17509 if (MEM_P (op0))
17510 op1 = force_reg (mode, op1);
17511 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
17513 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
17514 op1 = legitimize_pic_address (op1, reg);
17515 if (op0 == op1)
17516 return;
17517 op1 = convert_to_mode (mode, op1, 1);
17521 else
17523 if (MEM_P (op0)
17524 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
17525 || !push_operand (op0, mode))
17526 && MEM_P (op1))
17527 op1 = force_reg (mode, op1);
17529 if (push_operand (op0, mode)
17530 && ! general_no_elim_operand (op1, mode))
17531 op1 = copy_to_mode_reg (mode, op1);
17533 /* Force large constants in 64bit compilation into register
17534 to get them CSEed. */
17535 if (can_create_pseudo_p ()
17536 && (mode == DImode) && TARGET_64BIT
17537 && immediate_operand (op1, mode)
17538 && !x86_64_zext_immediate_operand (op1, VOIDmode)
17539 && !register_operand (op0, mode)
17540 && optimize)
17541 op1 = copy_to_mode_reg (mode, op1);
17543 if (can_create_pseudo_p ()
17544 && CONST_DOUBLE_P (op1))
17546 /* If we are loading a floating point constant to a register,
17547 force the value to memory now, since we'll get better code
17548 out the back end. */
17550 op1 = validize_mem (force_const_mem (mode, op1));
17551 if (!register_operand (op0, mode))
17553 rtx temp = gen_reg_rtx (mode);
17554 emit_insn (gen_rtx_SET (temp, op1));
17555 emit_move_insn (op0, temp);
17556 return;
17561 emit_insn (gen_rtx_SET (op0, op1));
17564 void
17565 ix86_expand_vector_move (machine_mode mode, rtx operands[])
17567 rtx op0 = operands[0], op1 = operands[1];
17568 unsigned int align = GET_MODE_ALIGNMENT (mode);
17570 if (push_operand (op0, VOIDmode))
17571 op0 = emit_move_resolve_push (mode, op0);
17573 /* Force constants other than zero into memory. We do not know how
17574 the instructions used to build constants modify the upper 64 bits
17575 of the register, once we have that information we may be able
17576 to handle some of them more efficiently. */
17577 if (can_create_pseudo_p ()
17578 && register_operand (op0, mode)
17579 && (CONSTANT_P (op1)
17580 || (GET_CODE (op1) == SUBREG
17581 && CONSTANT_P (SUBREG_REG (op1))))
17582 && !standard_sse_constant_p (op1))
17583 op1 = validize_mem (force_const_mem (mode, op1));
17585 /* We need to check memory alignment for SSE mode since attribute
17586 can make operands unaligned. */
17587 if (can_create_pseudo_p ()
17588 && SSE_REG_MODE_P (mode)
17589 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
17590 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
17592 rtx tmp[2];
17594 /* ix86_expand_vector_move_misalign() does not like constants ... */
17595 if (CONSTANT_P (op1)
17596 || (GET_CODE (op1) == SUBREG
17597 && CONSTANT_P (SUBREG_REG (op1))))
17598 op1 = validize_mem (force_const_mem (mode, op1));
17600 /* ... nor both arguments in memory. */
17601 if (!register_operand (op0, mode)
17602 && !register_operand (op1, mode))
17603 op1 = force_reg (mode, op1);
17605 tmp[0] = op0; tmp[1] = op1;
17606 ix86_expand_vector_move_misalign (mode, tmp);
17607 return;
17610 /* Make operand1 a register if it isn't already. */
17611 if (can_create_pseudo_p ()
17612 && !register_operand (op0, mode)
17613 && !register_operand (op1, mode))
17615 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
17616 return;
17619 emit_insn (gen_rtx_SET (op0, op1));
17622 /* Split 32-byte AVX unaligned load and store if needed. */
17624 static void
17625 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
17627 rtx m;
17628 rtx (*extract) (rtx, rtx, rtx);
17629 rtx (*load_unaligned) (rtx, rtx);
17630 rtx (*store_unaligned) (rtx, rtx);
17631 machine_mode mode;
17633 switch (GET_MODE (op0))
17635 default:
17636 gcc_unreachable ();
17637 case V32QImode:
17638 extract = gen_avx_vextractf128v32qi;
17639 load_unaligned = gen_avx_loaddquv32qi;
17640 store_unaligned = gen_avx_storedquv32qi;
17641 mode = V16QImode;
17642 break;
17643 case V8SFmode:
17644 extract = gen_avx_vextractf128v8sf;
17645 load_unaligned = gen_avx_loadups256;
17646 store_unaligned = gen_avx_storeups256;
17647 mode = V4SFmode;
17648 break;
17649 case V4DFmode:
17650 extract = gen_avx_vextractf128v4df;
17651 load_unaligned = gen_avx_loadupd256;
17652 store_unaligned = gen_avx_storeupd256;
17653 mode = V2DFmode;
17654 break;
17657 if (MEM_P (op1))
17659 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
17660 && optimize_insn_for_speed_p ())
17662 rtx r = gen_reg_rtx (mode);
17663 m = adjust_address (op1, mode, 0);
17664 emit_move_insn (r, m);
17665 m = adjust_address (op1, mode, 16);
17666 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
17667 emit_move_insn (op0, r);
17669 /* Normal *mov<mode>_internal pattern will handle
17670 unaligned loads just fine if misaligned_operand
17671 is true, and without the UNSPEC it can be combined
17672 with arithmetic instructions. */
17673 else if (misaligned_operand (op1, GET_MODE (op1)))
17674 emit_insn (gen_rtx_SET (op0, op1));
17675 else
17676 emit_insn (load_unaligned (op0, op1));
17678 else if (MEM_P (op0))
17680 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE
17681 && optimize_insn_for_speed_p ())
17683 m = adjust_address (op0, mode, 0);
17684 emit_insn (extract (m, op1, const0_rtx));
17685 m = adjust_address (op0, mode, 16);
17686 emit_insn (extract (m, op1, const1_rtx));
17688 else
17689 emit_insn (store_unaligned (op0, op1));
17691 else
17692 gcc_unreachable ();
17695 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17696 straight to ix86_expand_vector_move. */
17697 /* Code generation for scalar reg-reg moves of single and double precision data:
17698 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17699 movaps reg, reg
17700 else
17701 movss reg, reg
17702 if (x86_sse_partial_reg_dependency == true)
17703 movapd reg, reg
17704 else
17705 movsd reg, reg
17707 Code generation for scalar loads of double precision data:
17708 if (x86_sse_split_regs == true)
17709 movlpd mem, reg (gas syntax)
17710 else
17711 movsd mem, reg
17713 Code generation for unaligned packed loads of single precision data
17714 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17715 if (x86_sse_unaligned_move_optimal)
17716 movups mem, reg
17718 if (x86_sse_partial_reg_dependency == true)
17720 xorps reg, reg
17721 movlps mem, reg
17722 movhps mem+8, reg
17724 else
17726 movlps mem, reg
17727 movhps mem+8, reg
17730 Code generation for unaligned packed loads of double precision data
17731 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17732 if (x86_sse_unaligned_move_optimal)
17733 movupd mem, reg
17735 if (x86_sse_split_regs == true)
17737 movlpd mem, reg
17738 movhpd mem+8, reg
17740 else
17742 movsd mem, reg
17743 movhpd mem+8, reg
17747 void
17748 ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
17750 rtx op0, op1, orig_op0 = NULL_RTX, m;
17751 rtx (*load_unaligned) (rtx, rtx);
17752 rtx (*store_unaligned) (rtx, rtx);
17754 op0 = operands[0];
17755 op1 = operands[1];
17757 if (GET_MODE_SIZE (mode) == 64)
17759 switch (GET_MODE_CLASS (mode))
17761 case MODE_VECTOR_INT:
17762 case MODE_INT:
17763 if (GET_MODE (op0) != V16SImode)
17765 if (!MEM_P (op0))
17767 orig_op0 = op0;
17768 op0 = gen_reg_rtx (V16SImode);
17770 else
17771 op0 = gen_lowpart (V16SImode, op0);
17773 op1 = gen_lowpart (V16SImode, op1);
17774 /* FALLTHRU */
17776 case MODE_VECTOR_FLOAT:
17777 switch (GET_MODE (op0))
17779 default:
17780 gcc_unreachable ();
17781 case V16SImode:
17782 load_unaligned = gen_avx512f_loaddquv16si;
17783 store_unaligned = gen_avx512f_storedquv16si;
17784 break;
17785 case V16SFmode:
17786 load_unaligned = gen_avx512f_loadups512;
17787 store_unaligned = gen_avx512f_storeups512;
17788 break;
17789 case V8DFmode:
17790 load_unaligned = gen_avx512f_loadupd512;
17791 store_unaligned = gen_avx512f_storeupd512;
17792 break;
17795 if (MEM_P (op1))
17796 emit_insn (load_unaligned (op0, op1));
17797 else if (MEM_P (op0))
17798 emit_insn (store_unaligned (op0, op1));
17799 else
17800 gcc_unreachable ();
17801 if (orig_op0)
17802 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17803 break;
17805 default:
17806 gcc_unreachable ();
17809 return;
17812 if (TARGET_AVX
17813 && GET_MODE_SIZE (mode) == 32)
17815 switch (GET_MODE_CLASS (mode))
17817 case MODE_VECTOR_INT:
17818 case MODE_INT:
17819 if (GET_MODE (op0) != V32QImode)
17821 if (!MEM_P (op0))
17823 orig_op0 = op0;
17824 op0 = gen_reg_rtx (V32QImode);
17826 else
17827 op0 = gen_lowpart (V32QImode, op0);
17829 op1 = gen_lowpart (V32QImode, op1);
17830 /* FALLTHRU */
17832 case MODE_VECTOR_FLOAT:
17833 ix86_avx256_split_vector_move_misalign (op0, op1);
17834 if (orig_op0)
17835 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17836 break;
17838 default:
17839 gcc_unreachable ();
17842 return;
17845 if (MEM_P (op1))
17847 /* Normal *mov<mode>_internal pattern will handle
17848 unaligned loads just fine if misaligned_operand
17849 is true, and without the UNSPEC it can be combined
17850 with arithmetic instructions. */
17851 if (TARGET_AVX
17852 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17853 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17854 && misaligned_operand (op1, GET_MODE (op1)))
17855 emit_insn (gen_rtx_SET (op0, op1));
17856 /* ??? If we have typed data, then it would appear that using
17857 movdqu is the only way to get unaligned data loaded with
17858 integer type. */
17859 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17861 if (GET_MODE (op0) != V16QImode)
17863 orig_op0 = op0;
17864 op0 = gen_reg_rtx (V16QImode);
17866 op1 = gen_lowpart (V16QImode, op1);
17867 /* We will eventually emit movups based on insn attributes. */
17868 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17869 if (orig_op0)
17870 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17872 else if (TARGET_SSE2 && mode == V2DFmode)
17874 rtx zero;
17876 if (TARGET_AVX
17877 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17878 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17879 || optimize_insn_for_size_p ())
17881 /* We will eventually emit movups based on insn attributes. */
17882 emit_insn (gen_sse2_loadupd (op0, op1));
17883 return;
17886 /* When SSE registers are split into halves, we can avoid
17887 writing to the top half twice. */
17888 if (TARGET_SSE_SPLIT_REGS)
17890 emit_clobber (op0);
17891 zero = op0;
17893 else
17895 /* ??? Not sure about the best option for the Intel chips.
17896 The following would seem to satisfy; the register is
17897 entirely cleared, breaking the dependency chain. We
17898 then store to the upper half, with a dependency depth
17899 of one. A rumor has it that Intel recommends two movsd
17900 followed by an unpacklpd, but this is unconfirmed. And
17901 given that the dependency depth of the unpacklpd would
17902 still be one, I'm not sure why this would be better. */
17903 zero = CONST0_RTX (V2DFmode);
17906 m = adjust_address (op1, DFmode, 0);
17907 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17908 m = adjust_address (op1, DFmode, 8);
17909 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17911 else
17913 rtx t;
17915 if (TARGET_AVX
17916 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17917 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17918 || optimize_insn_for_size_p ())
17920 if (GET_MODE (op0) != V4SFmode)
17922 orig_op0 = op0;
17923 op0 = gen_reg_rtx (V4SFmode);
17925 op1 = gen_lowpart (V4SFmode, op1);
17926 emit_insn (gen_sse_loadups (op0, op1));
17927 if (orig_op0)
17928 emit_move_insn (orig_op0,
17929 gen_lowpart (GET_MODE (orig_op0), op0));
17930 return;
17933 if (mode != V4SFmode)
17934 t = gen_reg_rtx (V4SFmode);
17935 else
17936 t = op0;
17938 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17939 emit_move_insn (t, CONST0_RTX (V4SFmode));
17940 else
17941 emit_clobber (t);
17943 m = adjust_address (op1, V2SFmode, 0);
17944 emit_insn (gen_sse_loadlps (t, t, m));
17945 m = adjust_address (op1, V2SFmode, 8);
17946 emit_insn (gen_sse_loadhps (t, t, m));
17947 if (mode != V4SFmode)
17948 emit_move_insn (op0, gen_lowpart (mode, t));
17951 else if (MEM_P (op0))
17953 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17955 op0 = gen_lowpart (V16QImode, op0);
17956 op1 = gen_lowpart (V16QImode, op1);
17957 /* We will eventually emit movups based on insn attributes. */
17958 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17960 else if (TARGET_SSE2 && mode == V2DFmode)
17962 if (TARGET_AVX
17963 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17964 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17965 || optimize_insn_for_size_p ())
17966 /* We will eventually emit movups based on insn attributes. */
17967 emit_insn (gen_sse2_storeupd (op0, op1));
17968 else
17970 m = adjust_address (op0, DFmode, 0);
17971 emit_insn (gen_sse2_storelpd (m, op1));
17972 m = adjust_address (op0, DFmode, 8);
17973 emit_insn (gen_sse2_storehpd (m, op1));
17976 else
17978 if (mode != V4SFmode)
17979 op1 = gen_lowpart (V4SFmode, op1);
17981 if (TARGET_AVX
17982 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17983 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17984 || optimize_insn_for_size_p ())
17986 op0 = gen_lowpart (V4SFmode, op0);
17987 emit_insn (gen_sse_storeups (op0, op1));
17989 else
17991 m = adjust_address (op0, V2SFmode, 0);
17992 emit_insn (gen_sse_storelps (m, op1));
17993 m = adjust_address (op0, V2SFmode, 8);
17994 emit_insn (gen_sse_storehps (m, op1));
17998 else
17999 gcc_unreachable ();
18002 /* Helper function of ix86_fixup_binary_operands to canonicalize
18003 operand order. Returns true if the operands should be swapped. */
18005 static bool
18006 ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode,
18007 rtx operands[])
18009 rtx dst = operands[0];
18010 rtx src1 = operands[1];
18011 rtx src2 = operands[2];
18013 /* If the operation is not commutative, we can't do anything. */
18014 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
18015 return false;
18017 /* Highest priority is that src1 should match dst. */
18018 if (rtx_equal_p (dst, src1))
18019 return false;
18020 if (rtx_equal_p (dst, src2))
18021 return true;
18023 /* Next highest priority is that immediate constants come second. */
18024 if (immediate_operand (src2, mode))
18025 return false;
18026 if (immediate_operand (src1, mode))
18027 return true;
18029 /* Lowest priority is that memory references should come second. */
18030 if (MEM_P (src2))
18031 return false;
18032 if (MEM_P (src1))
18033 return true;
18035 return false;
18039 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
18040 destination to use for the operation. If different from the true
18041 destination in operands[0], a copy operation will be required. */
18044 ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
18045 rtx operands[])
18047 rtx dst = operands[0];
18048 rtx src1 = operands[1];
18049 rtx src2 = operands[2];
18051 /* Canonicalize operand order. */
18052 if (ix86_swap_binary_operands_p (code, mode, operands))
18054 /* It is invalid to swap operands of different modes. */
18055 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
18057 std::swap (src1, src2);
18060 /* Both source operands cannot be in memory. */
18061 if (MEM_P (src1) && MEM_P (src2))
18063 /* Optimization: Only read from memory once. */
18064 if (rtx_equal_p (src1, src2))
18066 src2 = force_reg (mode, src2);
18067 src1 = src2;
18069 else if (rtx_equal_p (dst, src1))
18070 src2 = force_reg (mode, src2);
18071 else
18072 src1 = force_reg (mode, src1);
18075 /* If the destination is memory, and we do not have matching source
18076 operands, do things in registers. */
18077 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
18078 dst = gen_reg_rtx (mode);
18080 /* Source 1 cannot be a constant. */
18081 if (CONSTANT_P (src1))
18082 src1 = force_reg (mode, src1);
18084 /* Source 1 cannot be a non-matching memory. */
18085 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
18086 src1 = force_reg (mode, src1);
18088 /* Improve address combine. */
18089 if (code == PLUS
18090 && GET_MODE_CLASS (mode) == MODE_INT
18091 && MEM_P (src2))
18092 src2 = force_reg (mode, src2);
18094 operands[1] = src1;
18095 operands[2] = src2;
18096 return dst;
18099 /* Similarly, but assume that the destination has already been
18100 set up properly. */
18102 void
18103 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
18104 machine_mode mode, rtx operands[])
18106 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
18107 gcc_assert (dst == operands[0]);
18110 /* Attempt to expand a binary operator. Make the expansion closer to the
18111 actual machine, then just general_operand, which will allow 3 separate
18112 memory references (one output, two input) in a single insn. */
18114 void
18115 ix86_expand_binary_operator (enum rtx_code code, machine_mode mode,
18116 rtx operands[])
18118 rtx src1, src2, dst, op, clob;
18120 dst = ix86_fixup_binary_operands (code, mode, operands);
18121 src1 = operands[1];
18122 src2 = operands[2];
18124 /* Emit the instruction. */
18126 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, src1, src2));
18128 if (reload_completed
18129 && code == PLUS
18130 && !rtx_equal_p (dst, src1))
18132 /* This is going to be an LEA; avoid splitting it later. */
18133 emit_insn (op);
18135 else
18137 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18138 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18141 /* Fix up the destination if needed. */
18142 if (dst != operands[0])
18143 emit_move_insn (operands[0], dst);
18146 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
18147 the given OPERANDS. */
18149 void
18150 ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
18151 rtx operands[])
18153 rtx op1 = NULL_RTX, op2 = NULL_RTX;
18154 if (GET_CODE (operands[1]) == SUBREG)
18156 op1 = operands[1];
18157 op2 = operands[2];
18159 else if (GET_CODE (operands[2]) == SUBREG)
18161 op1 = operands[2];
18162 op2 = operands[1];
18164 /* Optimize (__m128i) d | (__m128i) e and similar code
18165 when d and e are float vectors into float vector logical
18166 insn. In C/C++ without using intrinsics there is no other way
18167 to express vector logical operation on float vectors than
18168 to cast them temporarily to integer vectors. */
18169 if (op1
18170 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
18171 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
18172 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
18173 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
18174 && SUBREG_BYTE (op1) == 0
18175 && (GET_CODE (op2) == CONST_VECTOR
18176 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
18177 && SUBREG_BYTE (op2) == 0))
18178 && can_create_pseudo_p ())
18180 rtx dst;
18181 switch (GET_MODE (SUBREG_REG (op1)))
18183 case V4SFmode:
18184 case V8SFmode:
18185 case V16SFmode:
18186 case V2DFmode:
18187 case V4DFmode:
18188 case V8DFmode:
18189 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
18190 if (GET_CODE (op2) == CONST_VECTOR)
18192 op2 = gen_lowpart (GET_MODE (dst), op2);
18193 op2 = force_reg (GET_MODE (dst), op2);
18195 else
18197 op1 = operands[1];
18198 op2 = SUBREG_REG (operands[2]);
18199 if (!nonimmediate_operand (op2, GET_MODE (dst)))
18200 op2 = force_reg (GET_MODE (dst), op2);
18202 op1 = SUBREG_REG (op1);
18203 if (!nonimmediate_operand (op1, GET_MODE (dst)))
18204 op1 = force_reg (GET_MODE (dst), op1);
18205 emit_insn (gen_rtx_SET (dst,
18206 gen_rtx_fmt_ee (code, GET_MODE (dst),
18207 op1, op2)));
18208 emit_move_insn (operands[0], gen_lowpart (mode, dst));
18209 return;
18210 default:
18211 break;
18214 if (!nonimmediate_operand (operands[1], mode))
18215 operands[1] = force_reg (mode, operands[1]);
18216 if (!nonimmediate_operand (operands[2], mode))
18217 operands[2] = force_reg (mode, operands[2]);
18218 ix86_fixup_binary_operands_no_copy (code, mode, operands);
18219 emit_insn (gen_rtx_SET (operands[0],
18220 gen_rtx_fmt_ee (code, mode, operands[1],
18221 operands[2])));
18224 /* Return TRUE or FALSE depending on whether the binary operator meets the
18225 appropriate constraints. */
18227 bool
18228 ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
18229 rtx operands[3])
18231 rtx dst = operands[0];
18232 rtx src1 = operands[1];
18233 rtx src2 = operands[2];
18235 /* Both source operands cannot be in memory. */
18236 if (MEM_P (src1) && MEM_P (src2))
18237 return false;
18239 /* Canonicalize operand order for commutative operators. */
18240 if (ix86_swap_binary_operands_p (code, mode, operands))
18241 std::swap (src1, src2);
18243 /* If the destination is memory, we must have a matching source operand. */
18244 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
18245 return false;
18247 /* Source 1 cannot be a constant. */
18248 if (CONSTANT_P (src1))
18249 return false;
18251 /* Source 1 cannot be a non-matching memory. */
18252 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
18253 /* Support "andhi/andsi/anddi" as a zero-extending move. */
18254 return (code == AND
18255 && (mode == HImode
18256 || mode == SImode
18257 || (TARGET_64BIT && mode == DImode))
18258 && satisfies_constraint_L (src2));
18260 return true;
18263 /* Attempt to expand a unary operator. Make the expansion closer to the
18264 actual machine, then just general_operand, which will allow 2 separate
18265 memory references (one output, one input) in a single insn. */
18267 void
18268 ix86_expand_unary_operator (enum rtx_code code, machine_mode mode,
18269 rtx operands[])
18271 bool matching_memory = false;
18272 rtx src, dst, op, clob;
18274 dst = operands[0];
18275 src = operands[1];
18277 /* If the destination is memory, and we do not have matching source
18278 operands, do things in registers. */
18279 if (MEM_P (dst))
18281 if (rtx_equal_p (dst, src))
18282 matching_memory = true;
18283 else
18284 dst = gen_reg_rtx (mode);
18287 /* When source operand is memory, destination must match. */
18288 if (MEM_P (src) && !matching_memory)
18289 src = force_reg (mode, src);
18291 /* Emit the instruction. */
18293 op = gen_rtx_SET (dst, gen_rtx_fmt_e (code, mode, src));
18295 if (code == NOT)
18296 emit_insn (op);
18297 else
18299 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18300 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18303 /* Fix up the destination if needed. */
18304 if (dst != operands[0])
18305 emit_move_insn (operands[0], dst);
18308 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
18309 divisor are within the range [0-255]. */
18311 void
18312 ix86_split_idivmod (machine_mode mode, rtx operands[],
18313 bool signed_p)
18315 rtx_code_label *end_label, *qimode_label;
18316 rtx insn, div, mod;
18317 rtx scratch, tmp0, tmp1, tmp2;
18318 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
18319 rtx (*gen_zero_extend) (rtx, rtx);
18320 rtx (*gen_test_ccno_1) (rtx, rtx);
18322 switch (mode)
18324 case SImode:
18325 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
18326 gen_test_ccno_1 = gen_testsi_ccno_1;
18327 gen_zero_extend = gen_zero_extendqisi2;
18328 break;
18329 case DImode:
18330 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
18331 gen_test_ccno_1 = gen_testdi_ccno_1;
18332 gen_zero_extend = gen_zero_extendqidi2;
18333 break;
18334 default:
18335 gcc_unreachable ();
18338 end_label = gen_label_rtx ();
18339 qimode_label = gen_label_rtx ();
18341 scratch = gen_reg_rtx (mode);
18343 /* Use 8bit unsigned divimod if dividend and divisor are within
18344 the range [0-255]. */
18345 emit_move_insn (scratch, operands[2]);
18346 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
18347 scratch, 1, OPTAB_DIRECT);
18348 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
18349 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
18350 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
18351 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
18352 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
18353 pc_rtx);
18354 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp0));
18355 predict_jump (REG_BR_PROB_BASE * 50 / 100);
18356 JUMP_LABEL (insn) = qimode_label;
18358 /* Generate original signed/unsigned divimod. */
18359 div = gen_divmod4_1 (operands[0], operands[1],
18360 operands[2], operands[3]);
18361 emit_insn (div);
18363 /* Branch to the end. */
18364 emit_jump_insn (gen_jump (end_label));
18365 emit_barrier ();
18367 /* Generate 8bit unsigned divide. */
18368 emit_label (qimode_label);
18369 /* Don't use operands[0] for result of 8bit divide since not all
18370 registers support QImode ZERO_EXTRACT. */
18371 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
18372 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
18373 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
18374 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
18376 if (signed_p)
18378 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
18379 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
18381 else
18383 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
18384 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
18387 /* Extract remainder from AH. */
18388 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
18389 if (REG_P (operands[1]))
18390 insn = emit_move_insn (operands[1], tmp1);
18391 else
18393 /* Need a new scratch register since the old one has result
18394 of 8bit divide. */
18395 scratch = gen_reg_rtx (mode);
18396 emit_move_insn (scratch, tmp1);
18397 insn = emit_move_insn (operands[1], scratch);
18399 set_unique_reg_note (insn, REG_EQUAL, mod);
18401 /* Zero extend quotient from AL. */
18402 tmp1 = gen_lowpart (QImode, tmp0);
18403 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
18404 set_unique_reg_note (insn, REG_EQUAL, div);
18406 emit_label (end_label);
18409 #define LEA_MAX_STALL (3)
18410 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
18412 /* Increase given DISTANCE in half-cycles according to
18413 dependencies between PREV and NEXT instructions.
18414 Add 1 half-cycle if there is no dependency and
18415 go to next cycle if there is some dependecy. */
18417 static unsigned int
18418 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
18420 df_ref def, use;
18422 if (!prev || !next)
18423 return distance + (distance & 1) + 2;
18425 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
18426 return distance + 1;
18428 FOR_EACH_INSN_USE (use, next)
18429 FOR_EACH_INSN_DEF (def, prev)
18430 if (!DF_REF_IS_ARTIFICIAL (def)
18431 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
18432 return distance + (distance & 1) + 2;
18434 return distance + 1;
18437 /* Function checks if instruction INSN defines register number
18438 REGNO1 or REGNO2. */
18440 static bool
18441 insn_defines_reg (unsigned int regno1, unsigned int regno2,
18442 rtx_insn *insn)
18444 df_ref def;
18446 FOR_EACH_INSN_DEF (def, insn)
18447 if (DF_REF_REG_DEF_P (def)
18448 && !DF_REF_IS_ARTIFICIAL (def)
18449 && (regno1 == DF_REF_REGNO (def)
18450 || regno2 == DF_REF_REGNO (def)))
18451 return true;
18453 return false;
18456 /* Function checks if instruction INSN uses register number
18457 REGNO as a part of address expression. */
18459 static bool
18460 insn_uses_reg_mem (unsigned int regno, rtx insn)
18462 df_ref use;
18464 FOR_EACH_INSN_USE (use, insn)
18465 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
18466 return true;
18468 return false;
18471 /* Search backward for non-agu definition of register number REGNO1
18472 or register number REGNO2 in basic block starting from instruction
18473 START up to head of basic block or instruction INSN.
18475 Function puts true value into *FOUND var if definition was found
18476 and false otherwise.
18478 Distance in half-cycles between START and found instruction or head
18479 of BB is added to DISTANCE and returned. */
18481 static int
18482 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
18483 rtx_insn *insn, int distance,
18484 rtx_insn *start, bool *found)
18486 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
18487 rtx_insn *prev = start;
18488 rtx_insn *next = NULL;
18490 *found = false;
18492 while (prev
18493 && prev != insn
18494 && distance < LEA_SEARCH_THRESHOLD)
18496 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
18498 distance = increase_distance (prev, next, distance);
18499 if (insn_defines_reg (regno1, regno2, prev))
18501 if (recog_memoized (prev) < 0
18502 || get_attr_type (prev) != TYPE_LEA)
18504 *found = true;
18505 return distance;
18509 next = prev;
18511 if (prev == BB_HEAD (bb))
18512 break;
18514 prev = PREV_INSN (prev);
18517 return distance;
18520 /* Search backward for non-agu definition of register number REGNO1
18521 or register number REGNO2 in INSN's basic block until
18522 1. Pass LEA_SEARCH_THRESHOLD instructions, or
18523 2. Reach neighbour BBs boundary, or
18524 3. Reach agu definition.
18525 Returns the distance between the non-agu definition point and INSN.
18526 If no definition point, returns -1. */
18528 static int
18529 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
18530 rtx_insn *insn)
18532 basic_block bb = BLOCK_FOR_INSN (insn);
18533 int distance = 0;
18534 bool found = false;
18536 if (insn != BB_HEAD (bb))
18537 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
18538 distance, PREV_INSN (insn),
18539 &found);
18541 if (!found && distance < LEA_SEARCH_THRESHOLD)
18543 edge e;
18544 edge_iterator ei;
18545 bool simple_loop = false;
18547 FOR_EACH_EDGE (e, ei, bb->preds)
18548 if (e->src == bb)
18550 simple_loop = true;
18551 break;
18554 if (simple_loop)
18555 distance = distance_non_agu_define_in_bb (regno1, regno2,
18556 insn, distance,
18557 BB_END (bb), &found);
18558 else
18560 int shortest_dist = -1;
18561 bool found_in_bb = false;
18563 FOR_EACH_EDGE (e, ei, bb->preds)
18565 int bb_dist
18566 = distance_non_agu_define_in_bb (regno1, regno2,
18567 insn, distance,
18568 BB_END (e->src),
18569 &found_in_bb);
18570 if (found_in_bb)
18572 if (shortest_dist < 0)
18573 shortest_dist = bb_dist;
18574 else if (bb_dist > 0)
18575 shortest_dist = MIN (bb_dist, shortest_dist);
18577 found = true;
18581 distance = shortest_dist;
18585 /* get_attr_type may modify recog data. We want to make sure
18586 that recog data is valid for instruction INSN, on which
18587 distance_non_agu_define is called. INSN is unchanged here. */
18588 extract_insn_cached (insn);
18590 if (!found)
18591 return -1;
18593 return distance >> 1;
18596 /* Return the distance in half-cycles between INSN and the next
18597 insn that uses register number REGNO in memory address added
18598 to DISTANCE. Return -1 if REGNO0 is set.
18600 Put true value into *FOUND if register usage was found and
18601 false otherwise.
18602 Put true value into *REDEFINED if register redefinition was
18603 found and false otherwise. */
18605 static int
18606 distance_agu_use_in_bb (unsigned int regno,
18607 rtx_insn *insn, int distance, rtx_insn *start,
18608 bool *found, bool *redefined)
18610 basic_block bb = NULL;
18611 rtx_insn *next = start;
18612 rtx_insn *prev = NULL;
18614 *found = false;
18615 *redefined = false;
18617 if (start != NULL_RTX)
18619 bb = BLOCK_FOR_INSN (start);
18620 if (start != BB_HEAD (bb))
18621 /* If insn and start belong to the same bb, set prev to insn,
18622 so the call to increase_distance will increase the distance
18623 between insns by 1. */
18624 prev = insn;
18627 while (next
18628 && next != insn
18629 && distance < LEA_SEARCH_THRESHOLD)
18631 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
18633 distance = increase_distance(prev, next, distance);
18634 if (insn_uses_reg_mem (regno, next))
18636 /* Return DISTANCE if OP0 is used in memory
18637 address in NEXT. */
18638 *found = true;
18639 return distance;
18642 if (insn_defines_reg (regno, INVALID_REGNUM, next))
18644 /* Return -1 if OP0 is set in NEXT. */
18645 *redefined = true;
18646 return -1;
18649 prev = next;
18652 if (next == BB_END (bb))
18653 break;
18655 next = NEXT_INSN (next);
18658 return distance;
18661 /* Return the distance between INSN and the next insn that uses
18662 register number REGNO0 in memory address. Return -1 if no such
18663 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
18665 static int
18666 distance_agu_use (unsigned int regno0, rtx_insn *insn)
18668 basic_block bb = BLOCK_FOR_INSN (insn);
18669 int distance = 0;
18670 bool found = false;
18671 bool redefined = false;
18673 if (insn != BB_END (bb))
18674 distance = distance_agu_use_in_bb (regno0, insn, distance,
18675 NEXT_INSN (insn),
18676 &found, &redefined);
18678 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18680 edge e;
18681 edge_iterator ei;
18682 bool simple_loop = false;
18684 FOR_EACH_EDGE (e, ei, bb->succs)
18685 if (e->dest == bb)
18687 simple_loop = true;
18688 break;
18691 if (simple_loop)
18692 distance = distance_agu_use_in_bb (regno0, insn,
18693 distance, BB_HEAD (bb),
18694 &found, &redefined);
18695 else
18697 int shortest_dist = -1;
18698 bool found_in_bb = false;
18699 bool redefined_in_bb = false;
18701 FOR_EACH_EDGE (e, ei, bb->succs)
18703 int bb_dist
18704 = distance_agu_use_in_bb (regno0, insn,
18705 distance, BB_HEAD (e->dest),
18706 &found_in_bb, &redefined_in_bb);
18707 if (found_in_bb)
18709 if (shortest_dist < 0)
18710 shortest_dist = bb_dist;
18711 else if (bb_dist > 0)
18712 shortest_dist = MIN (bb_dist, shortest_dist);
18714 found = true;
18718 distance = shortest_dist;
18722 if (!found || redefined)
18723 return -1;
18725 return distance >> 1;
18728 /* Define this macro to tune LEA priority vs ADD, it take effect when
18729 there is a dilemma of choicing LEA or ADD
18730 Negative value: ADD is more preferred than LEA
18731 Zero: Netrual
18732 Positive value: LEA is more preferred than ADD*/
18733 #define IX86_LEA_PRIORITY 0
18735 /* Return true if usage of lea INSN has performance advantage
18736 over a sequence of instructions. Instructions sequence has
18737 SPLIT_COST cycles higher latency than lea latency. */
18739 static bool
18740 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
18741 unsigned int regno2, int split_cost, bool has_scale)
18743 int dist_define, dist_use;
18745 /* For Silvermont if using a 2-source or 3-source LEA for
18746 non-destructive destination purposes, or due to wanting
18747 ability to use SCALE, the use of LEA is justified. */
18748 if (TARGET_SILVERMONT || TARGET_INTEL)
18750 if (has_scale)
18751 return true;
18752 if (split_cost < 1)
18753 return false;
18754 if (regno0 == regno1 || regno0 == regno2)
18755 return false;
18756 return true;
18759 dist_define = distance_non_agu_define (regno1, regno2, insn);
18760 dist_use = distance_agu_use (regno0, insn);
18762 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18764 /* If there is no non AGU operand definition, no AGU
18765 operand usage and split cost is 0 then both lea
18766 and non lea variants have same priority. Currently
18767 we prefer lea for 64 bit code and non lea on 32 bit
18768 code. */
18769 if (dist_use < 0 && split_cost == 0)
18770 return TARGET_64BIT || IX86_LEA_PRIORITY;
18771 else
18772 return true;
18775 /* With longer definitions distance lea is more preferable.
18776 Here we change it to take into account splitting cost and
18777 lea priority. */
18778 dist_define += split_cost + IX86_LEA_PRIORITY;
18780 /* If there is no use in memory addess then we just check
18781 that split cost exceeds AGU stall. */
18782 if (dist_use < 0)
18783 return dist_define > LEA_MAX_STALL;
18785 /* If this insn has both backward non-agu dependence and forward
18786 agu dependence, the one with short distance takes effect. */
18787 return dist_define >= dist_use;
18790 /* Return true if it is legal to clobber flags by INSN and
18791 false otherwise. */
18793 static bool
18794 ix86_ok_to_clobber_flags (rtx_insn *insn)
18796 basic_block bb = BLOCK_FOR_INSN (insn);
18797 df_ref use;
18798 bitmap live;
18800 while (insn)
18802 if (NONDEBUG_INSN_P (insn))
18804 FOR_EACH_INSN_USE (use, insn)
18805 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
18806 return false;
18808 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18809 return true;
18812 if (insn == BB_END (bb))
18813 break;
18815 insn = NEXT_INSN (insn);
18818 live = df_get_live_out(bb);
18819 return !REGNO_REG_SET_P (live, FLAGS_REG);
18822 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18823 move and add to avoid AGU stalls. */
18825 bool
18826 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
18828 unsigned int regno0, regno1, regno2;
18830 /* Check if we need to optimize. */
18831 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18832 return false;
18834 /* Check it is correct to split here. */
18835 if (!ix86_ok_to_clobber_flags(insn))
18836 return false;
18838 regno0 = true_regnum (operands[0]);
18839 regno1 = true_regnum (operands[1]);
18840 regno2 = true_regnum (operands[2]);
18842 /* We need to split only adds with non destructive
18843 destination operand. */
18844 if (regno0 == regno1 || regno0 == regno2)
18845 return false;
18846 else
18847 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18850 /* Return true if we should emit lea instruction instead of mov
18851 instruction. */
18853 bool
18854 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
18856 unsigned int regno0, regno1;
18858 /* Check if we need to optimize. */
18859 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18860 return false;
18862 /* Use lea for reg to reg moves only. */
18863 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18864 return false;
18866 regno0 = true_regnum (operands[0]);
18867 regno1 = true_regnum (operands[1]);
18869 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18872 /* Return true if we need to split lea into a sequence of
18873 instructions to avoid AGU stalls. */
18875 bool
18876 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
18878 unsigned int regno0, regno1, regno2;
18879 int split_cost;
18880 struct ix86_address parts;
18881 int ok;
18883 /* Check we need to optimize. */
18884 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18885 return false;
18887 /* The "at least two components" test below might not catch simple
18888 move or zero extension insns if parts.base is non-NULL and parts.disp
18889 is const0_rtx as the only components in the address, e.g. if the
18890 register is %rbp or %r13. As this test is much cheaper and moves or
18891 zero extensions are the common case, do this check first. */
18892 if (REG_P (operands[1])
18893 || (SImode_address_operand (operands[1], VOIDmode)
18894 && REG_P (XEXP (operands[1], 0))))
18895 return false;
18897 /* Check if it is OK to split here. */
18898 if (!ix86_ok_to_clobber_flags (insn))
18899 return false;
18901 ok = ix86_decompose_address (operands[1], &parts);
18902 gcc_assert (ok);
18904 /* There should be at least two components in the address. */
18905 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18906 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18907 return false;
18909 /* We should not split into add if non legitimate pic
18910 operand is used as displacement. */
18911 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18912 return false;
18914 regno0 = true_regnum (operands[0]) ;
18915 regno1 = INVALID_REGNUM;
18916 regno2 = INVALID_REGNUM;
18918 if (parts.base)
18919 regno1 = true_regnum (parts.base);
18920 if (parts.index)
18921 regno2 = true_regnum (parts.index);
18923 split_cost = 0;
18925 /* Compute how many cycles we will add to execution time
18926 if split lea into a sequence of instructions. */
18927 if (parts.base || parts.index)
18929 /* Have to use mov instruction if non desctructive
18930 destination form is used. */
18931 if (regno1 != regno0 && regno2 != regno0)
18932 split_cost += 1;
18934 /* Have to add index to base if both exist. */
18935 if (parts.base && parts.index)
18936 split_cost += 1;
18938 /* Have to use shift and adds if scale is 2 or greater. */
18939 if (parts.scale > 1)
18941 if (regno0 != regno1)
18942 split_cost += 1;
18943 else if (regno2 == regno0)
18944 split_cost += 4;
18945 else
18946 split_cost += parts.scale;
18949 /* Have to use add instruction with immediate if
18950 disp is non zero. */
18951 if (parts.disp && parts.disp != const0_rtx)
18952 split_cost += 1;
18954 /* Subtract the price of lea. */
18955 split_cost -= 1;
18958 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18959 parts.scale > 1);
18962 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18963 matches destination. RTX includes clobber of FLAGS_REG. */
18965 static void
18966 ix86_emit_binop (enum rtx_code code, machine_mode mode,
18967 rtx dst, rtx src)
18969 rtx op, clob;
18971 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, dst, src));
18972 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18974 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18977 /* Return true if regno1 def is nearest to the insn. */
18979 static bool
18980 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
18982 rtx_insn *prev = insn;
18983 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
18985 if (insn == start)
18986 return false;
18987 while (prev && prev != start)
18989 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18991 prev = PREV_INSN (prev);
18992 continue;
18994 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18995 return true;
18996 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18997 return false;
18998 prev = PREV_INSN (prev);
19001 /* None of the regs is defined in the bb. */
19002 return false;
19005 /* Split lea instructions into a sequence of instructions
19006 which are executed on ALU to avoid AGU stalls.
19007 It is assumed that it is allowed to clobber flags register
19008 at lea position. */
19010 void
19011 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode)
19013 unsigned int regno0, regno1, regno2;
19014 struct ix86_address parts;
19015 rtx target, tmp;
19016 int ok, adds;
19018 ok = ix86_decompose_address (operands[1], &parts);
19019 gcc_assert (ok);
19021 target = gen_lowpart (mode, operands[0]);
19023 regno0 = true_regnum (target);
19024 regno1 = INVALID_REGNUM;
19025 regno2 = INVALID_REGNUM;
19027 if (parts.base)
19029 parts.base = gen_lowpart (mode, parts.base);
19030 regno1 = true_regnum (parts.base);
19033 if (parts.index)
19035 parts.index = gen_lowpart (mode, parts.index);
19036 regno2 = true_regnum (parts.index);
19039 if (parts.disp)
19040 parts.disp = gen_lowpart (mode, parts.disp);
19042 if (parts.scale > 1)
19044 /* Case r1 = r1 + ... */
19045 if (regno1 == regno0)
19047 /* If we have a case r1 = r1 + C * r2 then we
19048 should use multiplication which is very
19049 expensive. Assume cost model is wrong if we
19050 have such case here. */
19051 gcc_assert (regno2 != regno0);
19053 for (adds = parts.scale; adds > 0; adds--)
19054 ix86_emit_binop (PLUS, mode, target, parts.index);
19056 else
19058 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
19059 if (regno0 != regno2)
19060 emit_insn (gen_rtx_SET (target, parts.index));
19062 /* Use shift for scaling. */
19063 ix86_emit_binop (ASHIFT, mode, target,
19064 GEN_INT (exact_log2 (parts.scale)));
19066 if (parts.base)
19067 ix86_emit_binop (PLUS, mode, target, parts.base);
19069 if (parts.disp && parts.disp != const0_rtx)
19070 ix86_emit_binop (PLUS, mode, target, parts.disp);
19073 else if (!parts.base && !parts.index)
19075 gcc_assert(parts.disp);
19076 emit_insn (gen_rtx_SET (target, parts.disp));
19078 else
19080 if (!parts.base)
19082 if (regno0 != regno2)
19083 emit_insn (gen_rtx_SET (target, parts.index));
19085 else if (!parts.index)
19087 if (regno0 != regno1)
19088 emit_insn (gen_rtx_SET (target, parts.base));
19090 else
19092 if (regno0 == regno1)
19093 tmp = parts.index;
19094 else if (regno0 == regno2)
19095 tmp = parts.base;
19096 else
19098 rtx tmp1;
19100 /* Find better operand for SET instruction, depending
19101 on which definition is farther from the insn. */
19102 if (find_nearest_reg_def (insn, regno1, regno2))
19103 tmp = parts.index, tmp1 = parts.base;
19104 else
19105 tmp = parts.base, tmp1 = parts.index;
19107 emit_insn (gen_rtx_SET (target, tmp));
19109 if (parts.disp && parts.disp != const0_rtx)
19110 ix86_emit_binop (PLUS, mode, target, parts.disp);
19112 ix86_emit_binop (PLUS, mode, target, tmp1);
19113 return;
19116 ix86_emit_binop (PLUS, mode, target, tmp);
19119 if (parts.disp && parts.disp != const0_rtx)
19120 ix86_emit_binop (PLUS, mode, target, parts.disp);
19124 /* Return true if it is ok to optimize an ADD operation to LEA
19125 operation to avoid flag register consumation. For most processors,
19126 ADD is faster than LEA. For the processors like BONNELL, if the
19127 destination register of LEA holds an actual address which will be
19128 used soon, LEA is better and otherwise ADD is better. */
19130 bool
19131 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
19133 unsigned int regno0 = true_regnum (operands[0]);
19134 unsigned int regno1 = true_regnum (operands[1]);
19135 unsigned int regno2 = true_regnum (operands[2]);
19137 /* If a = b + c, (a!=b && a!=c), must use lea form. */
19138 if (regno0 != regno1 && regno0 != regno2)
19139 return true;
19141 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
19142 return false;
19144 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
19147 /* Return true if destination reg of SET_BODY is shift count of
19148 USE_BODY. */
19150 static bool
19151 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
19153 rtx set_dest;
19154 rtx shift_rtx;
19155 int i;
19157 /* Retrieve destination of SET_BODY. */
19158 switch (GET_CODE (set_body))
19160 case SET:
19161 set_dest = SET_DEST (set_body);
19162 if (!set_dest || !REG_P (set_dest))
19163 return false;
19164 break;
19165 case PARALLEL:
19166 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
19167 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
19168 use_body))
19169 return true;
19170 default:
19171 return false;
19172 break;
19175 /* Retrieve shift count of USE_BODY. */
19176 switch (GET_CODE (use_body))
19178 case SET:
19179 shift_rtx = XEXP (use_body, 1);
19180 break;
19181 case PARALLEL:
19182 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
19183 if (ix86_dep_by_shift_count_body (set_body,
19184 XVECEXP (use_body, 0, i)))
19185 return true;
19186 default:
19187 return false;
19188 break;
19191 if (shift_rtx
19192 && (GET_CODE (shift_rtx) == ASHIFT
19193 || GET_CODE (shift_rtx) == LSHIFTRT
19194 || GET_CODE (shift_rtx) == ASHIFTRT
19195 || GET_CODE (shift_rtx) == ROTATE
19196 || GET_CODE (shift_rtx) == ROTATERT))
19198 rtx shift_count = XEXP (shift_rtx, 1);
19200 /* Return true if shift count is dest of SET_BODY. */
19201 if (REG_P (shift_count))
19203 /* Add check since it can be invoked before register
19204 allocation in pre-reload schedule. */
19205 if (reload_completed
19206 && true_regnum (set_dest) == true_regnum (shift_count))
19207 return true;
19208 else if (REGNO(set_dest) == REGNO(shift_count))
19209 return true;
19213 return false;
19216 /* Return true if destination reg of SET_INSN is shift count of
19217 USE_INSN. */
19219 bool
19220 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
19222 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
19223 PATTERN (use_insn));
19226 /* Return TRUE or FALSE depending on whether the unary operator meets the
19227 appropriate constraints. */
19229 bool
19230 ix86_unary_operator_ok (enum rtx_code,
19231 machine_mode,
19232 rtx operands[2])
19234 /* If one of operands is memory, source and destination must match. */
19235 if ((MEM_P (operands[0])
19236 || MEM_P (operands[1]))
19237 && ! rtx_equal_p (operands[0], operands[1]))
19238 return false;
19239 return true;
19242 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
19243 are ok, keeping in mind the possible movddup alternative. */
19245 bool
19246 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
19248 if (MEM_P (operands[0]))
19249 return rtx_equal_p (operands[0], operands[1 + high]);
19250 if (MEM_P (operands[1]) && MEM_P (operands[2]))
19251 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
19252 return true;
19255 /* Post-reload splitter for converting an SF or DFmode value in an
19256 SSE register into an unsigned SImode. */
19258 void
19259 ix86_split_convert_uns_si_sse (rtx operands[])
19261 machine_mode vecmode;
19262 rtx value, large, zero_or_two31, input, two31, x;
19264 large = operands[1];
19265 zero_or_two31 = operands[2];
19266 input = operands[3];
19267 two31 = operands[4];
19268 vecmode = GET_MODE (large);
19269 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
19271 /* Load up the value into the low element. We must ensure that the other
19272 elements are valid floats -- zero is the easiest such value. */
19273 if (MEM_P (input))
19275 if (vecmode == V4SFmode)
19276 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
19277 else
19278 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
19280 else
19282 input = gen_rtx_REG (vecmode, REGNO (input));
19283 emit_move_insn (value, CONST0_RTX (vecmode));
19284 if (vecmode == V4SFmode)
19285 emit_insn (gen_sse_movss (value, value, input));
19286 else
19287 emit_insn (gen_sse2_movsd (value, value, input));
19290 emit_move_insn (large, two31);
19291 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
19293 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
19294 emit_insn (gen_rtx_SET (large, x));
19296 x = gen_rtx_AND (vecmode, zero_or_two31, large);
19297 emit_insn (gen_rtx_SET (zero_or_two31, x));
19299 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
19300 emit_insn (gen_rtx_SET (value, x));
19302 large = gen_rtx_REG (V4SImode, REGNO (large));
19303 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
19305 x = gen_rtx_REG (V4SImode, REGNO (value));
19306 if (vecmode == V4SFmode)
19307 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
19308 else
19309 emit_insn (gen_sse2_cvttpd2dq (x, value));
19310 value = x;
19312 emit_insn (gen_xorv4si3 (value, value, large));
19315 /* Convert an unsigned DImode value into a DFmode, using only SSE.
19316 Expects the 64-bit DImode to be supplied in a pair of integral
19317 registers. Requires SSE2; will use SSE3 if available. For x86_32,
19318 -mfpmath=sse, !optimize_size only. */
19320 void
19321 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
19323 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
19324 rtx int_xmm, fp_xmm;
19325 rtx biases, exponents;
19326 rtx x;
19328 int_xmm = gen_reg_rtx (V4SImode);
19329 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
19330 emit_insn (gen_movdi_to_sse (int_xmm, input));
19331 else if (TARGET_SSE_SPLIT_REGS)
19333 emit_clobber (int_xmm);
19334 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
19336 else
19338 x = gen_reg_rtx (V2DImode);
19339 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
19340 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
19343 x = gen_rtx_CONST_VECTOR (V4SImode,
19344 gen_rtvec (4, GEN_INT (0x43300000UL),
19345 GEN_INT (0x45300000UL),
19346 const0_rtx, const0_rtx));
19347 exponents = validize_mem (force_const_mem (V4SImode, x));
19349 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
19350 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
19352 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
19353 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
19354 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
19355 (0x1.0p84 + double(fp_value_hi_xmm)).
19356 Note these exponents differ by 32. */
19358 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
19360 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
19361 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
19362 real_ldexp (&bias_lo_rvt, &dconst1, 52);
19363 real_ldexp (&bias_hi_rvt, &dconst1, 84);
19364 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
19365 x = const_double_from_real_value (bias_hi_rvt, DFmode);
19366 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
19367 biases = validize_mem (force_const_mem (V2DFmode, biases));
19368 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
19370 /* Add the upper and lower DFmode values together. */
19371 if (TARGET_SSE3)
19372 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
19373 else
19375 x = copy_to_mode_reg (V2DFmode, fp_xmm);
19376 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
19377 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
19380 ix86_expand_vector_extract (false, target, fp_xmm, 0);
19383 /* Not used, but eases macroization of patterns. */
19384 void
19385 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
19387 gcc_unreachable ();
19390 /* Convert an unsigned SImode value into a DFmode. Only currently used
19391 for SSE, but applicable anywhere. */
19393 void
19394 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
19396 REAL_VALUE_TYPE TWO31r;
19397 rtx x, fp;
19399 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
19400 NULL, 1, OPTAB_DIRECT);
19402 fp = gen_reg_rtx (DFmode);
19403 emit_insn (gen_floatsidf2 (fp, x));
19405 real_ldexp (&TWO31r, &dconst1, 31);
19406 x = const_double_from_real_value (TWO31r, DFmode);
19408 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
19409 if (x != target)
19410 emit_move_insn (target, x);
19413 /* Convert a signed DImode value into a DFmode. Only used for SSE in
19414 32-bit mode; otherwise we have a direct convert instruction. */
19416 void
19417 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
19419 REAL_VALUE_TYPE TWO32r;
19420 rtx fp_lo, fp_hi, x;
19422 fp_lo = gen_reg_rtx (DFmode);
19423 fp_hi = gen_reg_rtx (DFmode);
19425 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
19427 real_ldexp (&TWO32r, &dconst1, 32);
19428 x = const_double_from_real_value (TWO32r, DFmode);
19429 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
19431 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
19433 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
19434 0, OPTAB_DIRECT);
19435 if (x != target)
19436 emit_move_insn (target, x);
19439 /* Convert an unsigned SImode value into a SFmode, using only SSE.
19440 For x86_32, -mfpmath=sse, !optimize_size only. */
19441 void
19442 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
19444 REAL_VALUE_TYPE ONE16r;
19445 rtx fp_hi, fp_lo, int_hi, int_lo, x;
19447 real_ldexp (&ONE16r, &dconst1, 16);
19448 x = const_double_from_real_value (ONE16r, SFmode);
19449 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
19450 NULL, 0, OPTAB_DIRECT);
19451 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
19452 NULL, 0, OPTAB_DIRECT);
19453 fp_hi = gen_reg_rtx (SFmode);
19454 fp_lo = gen_reg_rtx (SFmode);
19455 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
19456 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
19457 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
19458 0, OPTAB_DIRECT);
19459 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
19460 0, OPTAB_DIRECT);
19461 if (!rtx_equal_p (target, fp_hi))
19462 emit_move_insn (target, fp_hi);
19465 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
19466 a vector of unsigned ints VAL to vector of floats TARGET. */
19468 void
19469 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
19471 rtx tmp[8];
19472 REAL_VALUE_TYPE TWO16r;
19473 machine_mode intmode = GET_MODE (val);
19474 machine_mode fltmode = GET_MODE (target);
19475 rtx (*cvt) (rtx, rtx);
19477 if (intmode == V4SImode)
19478 cvt = gen_floatv4siv4sf2;
19479 else
19480 cvt = gen_floatv8siv8sf2;
19481 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
19482 tmp[0] = force_reg (intmode, tmp[0]);
19483 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
19484 OPTAB_DIRECT);
19485 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
19486 NULL_RTX, 1, OPTAB_DIRECT);
19487 tmp[3] = gen_reg_rtx (fltmode);
19488 emit_insn (cvt (tmp[3], tmp[1]));
19489 tmp[4] = gen_reg_rtx (fltmode);
19490 emit_insn (cvt (tmp[4], tmp[2]));
19491 real_ldexp (&TWO16r, &dconst1, 16);
19492 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
19493 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
19494 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
19495 OPTAB_DIRECT);
19496 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
19497 OPTAB_DIRECT);
19498 if (tmp[7] != target)
19499 emit_move_insn (target, tmp[7]);
19502 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
19503 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
19504 This is done by doing just signed conversion if < 0x1p31, and otherwise by
19505 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
19508 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
19510 REAL_VALUE_TYPE TWO31r;
19511 rtx two31r, tmp[4];
19512 machine_mode mode = GET_MODE (val);
19513 machine_mode scalarmode = GET_MODE_INNER (mode);
19514 machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
19515 rtx (*cmp) (rtx, rtx, rtx, rtx);
19516 int i;
19518 for (i = 0; i < 3; i++)
19519 tmp[i] = gen_reg_rtx (mode);
19520 real_ldexp (&TWO31r, &dconst1, 31);
19521 two31r = const_double_from_real_value (TWO31r, scalarmode);
19522 two31r = ix86_build_const_vector (mode, 1, two31r);
19523 two31r = force_reg (mode, two31r);
19524 switch (mode)
19526 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
19527 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
19528 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
19529 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
19530 default: gcc_unreachable ();
19532 tmp[3] = gen_rtx_LE (mode, two31r, val);
19533 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
19534 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
19535 0, OPTAB_DIRECT);
19536 if (intmode == V4SImode || TARGET_AVX2)
19537 *xorp = expand_simple_binop (intmode, ASHIFT,
19538 gen_lowpart (intmode, tmp[0]),
19539 GEN_INT (31), NULL_RTX, 0,
19540 OPTAB_DIRECT);
19541 else
19543 rtx two31 = GEN_INT (HOST_WIDE_INT_1U << 31);
19544 two31 = ix86_build_const_vector (intmode, 1, two31);
19545 *xorp = expand_simple_binop (intmode, AND,
19546 gen_lowpart (intmode, tmp[0]),
19547 two31, NULL_RTX, 0,
19548 OPTAB_DIRECT);
19550 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
19551 0, OPTAB_DIRECT);
19554 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
19555 then replicate the value for all elements of the vector
19556 register. */
19559 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
19561 int i, n_elt;
19562 rtvec v;
19563 machine_mode scalar_mode;
19565 switch (mode)
19567 case V64QImode:
19568 case V32QImode:
19569 case V16QImode:
19570 case V32HImode:
19571 case V16HImode:
19572 case V8HImode:
19573 case V16SImode:
19574 case V8SImode:
19575 case V4SImode:
19576 case V8DImode:
19577 case V4DImode:
19578 case V2DImode:
19579 gcc_assert (vect);
19580 case V16SFmode:
19581 case V8SFmode:
19582 case V4SFmode:
19583 case V8DFmode:
19584 case V4DFmode:
19585 case V2DFmode:
19586 n_elt = GET_MODE_NUNITS (mode);
19587 v = rtvec_alloc (n_elt);
19588 scalar_mode = GET_MODE_INNER (mode);
19590 RTVEC_ELT (v, 0) = value;
19592 for (i = 1; i < n_elt; ++i)
19593 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
19595 return gen_rtx_CONST_VECTOR (mode, v);
19597 default:
19598 gcc_unreachable ();
19602 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
19603 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
19604 for an SSE register. If VECT is true, then replicate the mask for
19605 all elements of the vector register. If INVERT is true, then create
19606 a mask excluding the sign bit. */
19609 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
19611 machine_mode vec_mode, imode;
19612 wide_int w;
19613 rtx mask, v;
19615 switch (mode)
19617 case V16SImode:
19618 case V16SFmode:
19619 case V8SImode:
19620 case V4SImode:
19621 case V8SFmode:
19622 case V4SFmode:
19623 vec_mode = mode;
19624 mode = GET_MODE_INNER (mode);
19625 imode = SImode;
19626 break;
19628 case V8DImode:
19629 case V4DImode:
19630 case V2DImode:
19631 case V8DFmode:
19632 case V4DFmode:
19633 case V2DFmode:
19634 vec_mode = mode;
19635 mode = GET_MODE_INNER (mode);
19636 imode = DImode;
19637 break;
19639 case TImode:
19640 case TFmode:
19641 vec_mode = VOIDmode;
19642 imode = TImode;
19643 break;
19645 default:
19646 gcc_unreachable ();
19649 w = wi::set_bit_in_zero (GET_MODE_BITSIZE (mode) - 1,
19650 GET_MODE_BITSIZE (mode));
19651 if (invert)
19652 w = wi::bit_not (w);
19654 /* Force this value into the low part of a fp vector constant. */
19655 mask = immed_wide_int_const (w, imode);
19656 mask = gen_lowpart (mode, mask);
19658 if (vec_mode == VOIDmode)
19659 return force_reg (mode, mask);
19661 v = ix86_build_const_vector (vec_mode, vect, mask);
19662 return force_reg (vec_mode, v);
19665 /* Generate code for floating point ABS or NEG. */
19667 void
19668 ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode,
19669 rtx operands[])
19671 rtx mask, set, dst, src;
19672 bool use_sse = false;
19673 bool vector_mode = VECTOR_MODE_P (mode);
19674 machine_mode vmode = mode;
19676 if (vector_mode)
19677 use_sse = true;
19678 else if (mode == TFmode)
19679 use_sse = true;
19680 else if (TARGET_SSE_MATH)
19682 use_sse = SSE_FLOAT_MODE_P (mode);
19683 if (mode == SFmode)
19684 vmode = V4SFmode;
19685 else if (mode == DFmode)
19686 vmode = V2DFmode;
19689 /* NEG and ABS performed with SSE use bitwise mask operations.
19690 Create the appropriate mask now. */
19691 if (use_sse)
19692 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19693 else
19694 mask = NULL_RTX;
19696 dst = operands[0];
19697 src = operands[1];
19699 set = gen_rtx_fmt_e (code, mode, src);
19700 set = gen_rtx_SET (dst, set);
19702 if (mask)
19704 rtx use, clob;
19705 rtvec par;
19707 use = gen_rtx_USE (VOIDmode, mask);
19708 if (vector_mode)
19709 par = gen_rtvec (2, set, use);
19710 else
19712 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19713 par = gen_rtvec (3, set, use, clob);
19715 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19717 else
19718 emit_insn (set);
19721 /* Expand a copysign operation. Special case operand 0 being a constant. */
19723 void
19724 ix86_expand_copysign (rtx operands[])
19726 machine_mode mode, vmode;
19727 rtx dest, op0, op1, mask, nmask;
19729 dest = operands[0];
19730 op0 = operands[1];
19731 op1 = operands[2];
19733 mode = GET_MODE (dest);
19735 if (mode == SFmode)
19736 vmode = V4SFmode;
19737 else if (mode == DFmode)
19738 vmode = V2DFmode;
19739 else
19740 vmode = mode;
19742 if (CONST_DOUBLE_P (op0))
19744 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19746 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19747 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19749 if (mode == SFmode || mode == DFmode)
19751 if (op0 == CONST0_RTX (mode))
19752 op0 = CONST0_RTX (vmode);
19753 else
19755 rtx v = ix86_build_const_vector (vmode, false, op0);
19757 op0 = force_reg (vmode, v);
19760 else if (op0 != CONST0_RTX (mode))
19761 op0 = force_reg (mode, op0);
19763 mask = ix86_build_signbit_mask (vmode, 0, 0);
19765 if (mode == SFmode)
19766 copysign_insn = gen_copysignsf3_const;
19767 else if (mode == DFmode)
19768 copysign_insn = gen_copysigndf3_const;
19769 else
19770 copysign_insn = gen_copysigntf3_const;
19772 emit_insn (copysign_insn (dest, op0, op1, mask));
19774 else
19776 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19778 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19779 mask = ix86_build_signbit_mask (vmode, 0, 0);
19781 if (mode == SFmode)
19782 copysign_insn = gen_copysignsf3_var;
19783 else if (mode == DFmode)
19784 copysign_insn = gen_copysigndf3_var;
19785 else
19786 copysign_insn = gen_copysigntf3_var;
19788 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19792 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19793 be a constant, and so has already been expanded into a vector constant. */
19795 void
19796 ix86_split_copysign_const (rtx operands[])
19798 machine_mode mode, vmode;
19799 rtx dest, op0, mask, x;
19801 dest = operands[0];
19802 op0 = operands[1];
19803 mask = operands[3];
19805 mode = GET_MODE (dest);
19806 vmode = GET_MODE (mask);
19808 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19809 x = gen_rtx_AND (vmode, dest, mask);
19810 emit_insn (gen_rtx_SET (dest, x));
19812 if (op0 != CONST0_RTX (vmode))
19814 x = gen_rtx_IOR (vmode, dest, op0);
19815 emit_insn (gen_rtx_SET (dest, x));
19819 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19820 so we have to do two masks. */
19822 void
19823 ix86_split_copysign_var (rtx operands[])
19825 machine_mode mode, vmode;
19826 rtx dest, scratch, op0, op1, mask, nmask, x;
19828 dest = operands[0];
19829 scratch = operands[1];
19830 op0 = operands[2];
19831 op1 = operands[3];
19832 nmask = operands[4];
19833 mask = operands[5];
19835 mode = GET_MODE (dest);
19836 vmode = GET_MODE (mask);
19838 if (rtx_equal_p (op0, op1))
19840 /* Shouldn't happen often (it's useless, obviously), but when it does
19841 we'd generate incorrect code if we continue below. */
19842 emit_move_insn (dest, op0);
19843 return;
19846 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19848 gcc_assert (REGNO (op1) == REGNO (scratch));
19850 x = gen_rtx_AND (vmode, scratch, mask);
19851 emit_insn (gen_rtx_SET (scratch, x));
19853 dest = mask;
19854 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19855 x = gen_rtx_NOT (vmode, dest);
19856 x = gen_rtx_AND (vmode, x, op0);
19857 emit_insn (gen_rtx_SET (dest, x));
19859 else
19861 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19863 x = gen_rtx_AND (vmode, scratch, mask);
19865 else /* alternative 2,4 */
19867 gcc_assert (REGNO (mask) == REGNO (scratch));
19868 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19869 x = gen_rtx_AND (vmode, scratch, op1);
19871 emit_insn (gen_rtx_SET (scratch, x));
19873 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19875 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19876 x = gen_rtx_AND (vmode, dest, nmask);
19878 else /* alternative 3,4 */
19880 gcc_assert (REGNO (nmask) == REGNO (dest));
19881 dest = nmask;
19882 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19883 x = gen_rtx_AND (vmode, dest, op0);
19885 emit_insn (gen_rtx_SET (dest, x));
19888 x = gen_rtx_IOR (vmode, dest, scratch);
19889 emit_insn (gen_rtx_SET (dest, x));
19892 /* Return TRUE or FALSE depending on whether the first SET in INSN
19893 has source and destination with matching CC modes, and that the
19894 CC mode is at least as constrained as REQ_MODE. */
19896 bool
19897 ix86_match_ccmode (rtx insn, machine_mode req_mode)
19899 rtx set;
19900 machine_mode set_mode;
19902 set = PATTERN (insn);
19903 if (GET_CODE (set) == PARALLEL)
19904 set = XVECEXP (set, 0, 0);
19905 gcc_assert (GET_CODE (set) == SET);
19906 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19908 set_mode = GET_MODE (SET_DEST (set));
19909 switch (set_mode)
19911 case CCNOmode:
19912 if (req_mode != CCNOmode
19913 && (req_mode != CCmode
19914 || XEXP (SET_SRC (set), 1) != const0_rtx))
19915 return false;
19916 break;
19917 case CCmode:
19918 if (req_mode == CCGCmode)
19919 return false;
19920 /* FALLTHRU */
19921 case CCGCmode:
19922 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19923 return false;
19924 /* FALLTHRU */
19925 case CCGOCmode:
19926 if (req_mode == CCZmode)
19927 return false;
19928 /* FALLTHRU */
19929 case CCZmode:
19930 break;
19932 case CCAmode:
19933 case CCCmode:
19934 case CCOmode:
19935 case CCPmode:
19936 case CCSmode:
19937 if (set_mode != req_mode)
19938 return false;
19939 break;
19941 default:
19942 gcc_unreachable ();
19945 return GET_MODE (SET_SRC (set)) == set_mode;
19948 /* Generate insn patterns to do an integer compare of OPERANDS. */
19950 static rtx
19951 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19953 machine_mode cmpmode;
19954 rtx tmp, flags;
19956 cmpmode = SELECT_CC_MODE (code, op0, op1);
19957 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19959 /* This is very simple, but making the interface the same as in the
19960 FP case makes the rest of the code easier. */
19961 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19962 emit_insn (gen_rtx_SET (flags, tmp));
19964 /* Return the test that should be put into the flags user, i.e.
19965 the bcc, scc, or cmov instruction. */
19966 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19969 /* Figure out whether to use ordered or unordered fp comparisons.
19970 Return the appropriate mode to use. */
19972 machine_mode
19973 ix86_fp_compare_mode (enum rtx_code)
19975 /* ??? In order to make all comparisons reversible, we do all comparisons
19976 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19977 all forms trapping and nontrapping comparisons, we can make inequality
19978 comparisons trapping again, since it results in better code when using
19979 FCOM based compares. */
19980 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19983 machine_mode
19984 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19986 machine_mode mode = GET_MODE (op0);
19988 if (SCALAR_FLOAT_MODE_P (mode))
19990 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19991 return ix86_fp_compare_mode (code);
19994 switch (code)
19996 /* Only zero flag is needed. */
19997 case EQ: /* ZF=0 */
19998 case NE: /* ZF!=0 */
19999 return CCZmode;
20000 /* Codes needing carry flag. */
20001 case GEU: /* CF=0 */
20002 case LTU: /* CF=1 */
20003 /* Detect overflow checks. They need just the carry flag. */
20004 if (GET_CODE (op0) == PLUS
20005 && rtx_equal_p (op1, XEXP (op0, 0)))
20006 return CCCmode;
20007 else
20008 return CCmode;
20009 case GTU: /* CF=0 & ZF=0 */
20010 case LEU: /* CF=1 | ZF=1 */
20011 return CCmode;
20012 /* Codes possibly doable only with sign flag when
20013 comparing against zero. */
20014 case GE: /* SF=OF or SF=0 */
20015 case LT: /* SF<>OF or SF=1 */
20016 if (op1 == const0_rtx)
20017 return CCGOCmode;
20018 else
20019 /* For other cases Carry flag is not required. */
20020 return CCGCmode;
20021 /* Codes doable only with sign flag when comparing
20022 against zero, but we miss jump instruction for it
20023 so we need to use relational tests against overflow
20024 that thus needs to be zero. */
20025 case GT: /* ZF=0 & SF=OF */
20026 case LE: /* ZF=1 | SF<>OF */
20027 if (op1 == const0_rtx)
20028 return CCNOmode;
20029 else
20030 return CCGCmode;
20031 /* strcmp pattern do (use flags) and combine may ask us for proper
20032 mode. */
20033 case USE:
20034 return CCmode;
20035 default:
20036 gcc_unreachable ();
20040 /* Return the fixed registers used for condition codes. */
20042 static bool
20043 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
20045 *p1 = FLAGS_REG;
20046 *p2 = FPSR_REG;
20047 return true;
20050 /* If two condition code modes are compatible, return a condition code
20051 mode which is compatible with both. Otherwise, return
20052 VOIDmode. */
20054 static machine_mode
20055 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
20057 if (m1 == m2)
20058 return m1;
20060 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
20061 return VOIDmode;
20063 if ((m1 == CCGCmode && m2 == CCGOCmode)
20064 || (m1 == CCGOCmode && m2 == CCGCmode))
20065 return CCGCmode;
20067 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
20068 return m2;
20069 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
20070 return m1;
20072 switch (m1)
20074 default:
20075 gcc_unreachable ();
20077 case CCmode:
20078 case CCGCmode:
20079 case CCGOCmode:
20080 case CCNOmode:
20081 case CCAmode:
20082 case CCCmode:
20083 case CCOmode:
20084 case CCPmode:
20085 case CCSmode:
20086 case CCZmode:
20087 switch (m2)
20089 default:
20090 return VOIDmode;
20092 case CCmode:
20093 case CCGCmode:
20094 case CCGOCmode:
20095 case CCNOmode:
20096 case CCAmode:
20097 case CCCmode:
20098 case CCOmode:
20099 case CCPmode:
20100 case CCSmode:
20101 case CCZmode:
20102 return CCmode;
20105 case CCFPmode:
20106 case CCFPUmode:
20107 /* These are only compatible with themselves, which we already
20108 checked above. */
20109 return VOIDmode;
20114 /* Return a comparison we can do and that it is equivalent to
20115 swap_condition (code) apart possibly from orderedness.
20116 But, never change orderedness if TARGET_IEEE_FP, returning
20117 UNKNOWN in that case if necessary. */
20119 static enum rtx_code
20120 ix86_fp_swap_condition (enum rtx_code code)
20122 switch (code)
20124 case GT: /* GTU - CF=0 & ZF=0 */
20125 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
20126 case GE: /* GEU - CF=0 */
20127 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
20128 case UNLT: /* LTU - CF=1 */
20129 return TARGET_IEEE_FP ? UNKNOWN : GT;
20130 case UNLE: /* LEU - CF=1 | ZF=1 */
20131 return TARGET_IEEE_FP ? UNKNOWN : GE;
20132 default:
20133 return swap_condition (code);
20137 /* Return cost of comparison CODE using the best strategy for performance.
20138 All following functions do use number of instructions as a cost metrics.
20139 In future this should be tweaked to compute bytes for optimize_size and
20140 take into account performance of various instructions on various CPUs. */
20142 static int
20143 ix86_fp_comparison_cost (enum rtx_code code)
20145 int arith_cost;
20147 /* The cost of code using bit-twiddling on %ah. */
20148 switch (code)
20150 case UNLE:
20151 case UNLT:
20152 case LTGT:
20153 case GT:
20154 case GE:
20155 case UNORDERED:
20156 case ORDERED:
20157 case UNEQ:
20158 arith_cost = 4;
20159 break;
20160 case LT:
20161 case NE:
20162 case EQ:
20163 case UNGE:
20164 arith_cost = TARGET_IEEE_FP ? 5 : 4;
20165 break;
20166 case LE:
20167 case UNGT:
20168 arith_cost = TARGET_IEEE_FP ? 6 : 4;
20169 break;
20170 default:
20171 gcc_unreachable ();
20174 switch (ix86_fp_comparison_strategy (code))
20176 case IX86_FPCMP_COMI:
20177 return arith_cost > 4 ? 3 : 2;
20178 case IX86_FPCMP_SAHF:
20179 return arith_cost > 4 ? 4 : 3;
20180 default:
20181 return arith_cost;
20185 /* Return strategy to use for floating-point. We assume that fcomi is always
20186 preferrable where available, since that is also true when looking at size
20187 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
20189 enum ix86_fpcmp_strategy
20190 ix86_fp_comparison_strategy (enum rtx_code)
20192 /* Do fcomi/sahf based test when profitable. */
20194 if (TARGET_CMOVE)
20195 return IX86_FPCMP_COMI;
20197 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
20198 return IX86_FPCMP_SAHF;
20200 return IX86_FPCMP_ARITH;
20203 /* Swap, force into registers, or otherwise massage the two operands
20204 to a fp comparison. The operands are updated in place; the new
20205 comparison code is returned. */
20207 static enum rtx_code
20208 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
20210 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
20211 rtx op0 = *pop0, op1 = *pop1;
20212 machine_mode op_mode = GET_MODE (op0);
20213 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
20215 /* All of the unordered compare instructions only work on registers.
20216 The same is true of the fcomi compare instructions. The XFmode
20217 compare instructions require registers except when comparing
20218 against zero or when converting operand 1 from fixed point to
20219 floating point. */
20221 if (!is_sse
20222 && (fpcmp_mode == CCFPUmode
20223 || (op_mode == XFmode
20224 && ! (standard_80387_constant_p (op0) == 1
20225 || standard_80387_constant_p (op1) == 1)
20226 && GET_CODE (op1) != FLOAT)
20227 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
20229 op0 = force_reg (op_mode, op0);
20230 op1 = force_reg (op_mode, op1);
20232 else
20234 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
20235 things around if they appear profitable, otherwise force op0
20236 into a register. */
20238 if (standard_80387_constant_p (op0) == 0
20239 || (MEM_P (op0)
20240 && ! (standard_80387_constant_p (op1) == 0
20241 || MEM_P (op1))))
20243 enum rtx_code new_code = ix86_fp_swap_condition (code);
20244 if (new_code != UNKNOWN)
20246 std::swap (op0, op1);
20247 code = new_code;
20251 if (!REG_P (op0))
20252 op0 = force_reg (op_mode, op0);
20254 if (CONSTANT_P (op1))
20256 int tmp = standard_80387_constant_p (op1);
20257 if (tmp == 0)
20258 op1 = validize_mem (force_const_mem (op_mode, op1));
20259 else if (tmp == 1)
20261 if (TARGET_CMOVE)
20262 op1 = force_reg (op_mode, op1);
20264 else
20265 op1 = force_reg (op_mode, op1);
20269 /* Try to rearrange the comparison to make it cheaper. */
20270 if (ix86_fp_comparison_cost (code)
20271 > ix86_fp_comparison_cost (swap_condition (code))
20272 && (REG_P (op1) || can_create_pseudo_p ()))
20274 std::swap (op0, op1);
20275 code = swap_condition (code);
20276 if (!REG_P (op0))
20277 op0 = force_reg (op_mode, op0);
20280 *pop0 = op0;
20281 *pop1 = op1;
20282 return code;
20285 /* Convert comparison codes we use to represent FP comparison to integer
20286 code that will result in proper branch. Return UNKNOWN if no such code
20287 is available. */
20289 enum rtx_code
20290 ix86_fp_compare_code_to_integer (enum rtx_code code)
20292 switch (code)
20294 case GT:
20295 return GTU;
20296 case GE:
20297 return GEU;
20298 case ORDERED:
20299 case UNORDERED:
20300 return code;
20301 break;
20302 case UNEQ:
20303 return EQ;
20304 break;
20305 case UNLT:
20306 return LTU;
20307 break;
20308 case UNLE:
20309 return LEU;
20310 break;
20311 case LTGT:
20312 return NE;
20313 break;
20314 default:
20315 return UNKNOWN;
20319 /* Generate insn patterns to do a floating point compare of OPERANDS. */
20321 static rtx
20322 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
20324 machine_mode fpcmp_mode, intcmp_mode;
20325 rtx tmp, tmp2;
20327 fpcmp_mode = ix86_fp_compare_mode (code);
20328 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
20330 /* Do fcomi/sahf based test when profitable. */
20331 switch (ix86_fp_comparison_strategy (code))
20333 case IX86_FPCMP_COMI:
20334 intcmp_mode = fpcmp_mode;
20335 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20336 tmp = gen_rtx_SET (gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
20337 emit_insn (tmp);
20338 break;
20340 case IX86_FPCMP_SAHF:
20341 intcmp_mode = fpcmp_mode;
20342 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20343 tmp = gen_rtx_SET (gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
20345 if (!scratch)
20346 scratch = gen_reg_rtx (HImode);
20347 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
20348 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
20349 break;
20351 case IX86_FPCMP_ARITH:
20352 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
20353 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20354 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
20355 if (!scratch)
20356 scratch = gen_reg_rtx (HImode);
20357 emit_insn (gen_rtx_SET (scratch, tmp2));
20359 /* In the unordered case, we have to check C2 for NaN's, which
20360 doesn't happen to work out to anything nice combination-wise.
20361 So do some bit twiddling on the value we've got in AH to come
20362 up with an appropriate set of condition codes. */
20364 intcmp_mode = CCNOmode;
20365 switch (code)
20367 case GT:
20368 case UNGT:
20369 if (code == GT || !TARGET_IEEE_FP)
20371 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20372 code = EQ;
20374 else
20376 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20377 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20378 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
20379 intcmp_mode = CCmode;
20380 code = GEU;
20382 break;
20383 case LT:
20384 case UNLT:
20385 if (code == LT && TARGET_IEEE_FP)
20387 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20388 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
20389 intcmp_mode = CCmode;
20390 code = EQ;
20392 else
20394 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
20395 code = NE;
20397 break;
20398 case GE:
20399 case UNGE:
20400 if (code == GE || !TARGET_IEEE_FP)
20402 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
20403 code = EQ;
20405 else
20407 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20408 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
20409 code = NE;
20411 break;
20412 case LE:
20413 case UNLE:
20414 if (code == LE && TARGET_IEEE_FP)
20416 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20417 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20418 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20419 intcmp_mode = CCmode;
20420 code = LTU;
20422 else
20424 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20425 code = NE;
20427 break;
20428 case EQ:
20429 case UNEQ:
20430 if (code == EQ && TARGET_IEEE_FP)
20432 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20433 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20434 intcmp_mode = CCmode;
20435 code = EQ;
20437 else
20439 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20440 code = NE;
20442 break;
20443 case NE:
20444 case LTGT:
20445 if (code == NE && TARGET_IEEE_FP)
20447 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20448 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
20449 GEN_INT (0x40)));
20450 code = NE;
20452 else
20454 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20455 code = EQ;
20457 break;
20459 case UNORDERED:
20460 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20461 code = NE;
20462 break;
20463 case ORDERED:
20464 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20465 code = EQ;
20466 break;
20468 default:
20469 gcc_unreachable ();
20471 break;
20473 default:
20474 gcc_unreachable();
20477 /* Return the test that should be put into the flags user, i.e.
20478 the bcc, scc, or cmov instruction. */
20479 return gen_rtx_fmt_ee (code, VOIDmode,
20480 gen_rtx_REG (intcmp_mode, FLAGS_REG),
20481 const0_rtx);
20484 static rtx
20485 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
20487 rtx ret;
20489 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
20490 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
20492 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
20494 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
20495 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20497 else
20498 ret = ix86_expand_int_compare (code, op0, op1);
20500 return ret;
20503 void
20504 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
20506 machine_mode mode = GET_MODE (op0);
20507 rtx tmp;
20509 switch (mode)
20511 case SFmode:
20512 case DFmode:
20513 case XFmode:
20514 case QImode:
20515 case HImode:
20516 case SImode:
20517 simple:
20518 tmp = ix86_expand_compare (code, op0, op1);
20519 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20520 gen_rtx_LABEL_REF (VOIDmode, label),
20521 pc_rtx);
20522 emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
20523 return;
20525 case DImode:
20526 if (TARGET_64BIT)
20527 goto simple;
20528 case TImode:
20529 /* Expand DImode branch into multiple compare+branch. */
20531 rtx lo[2], hi[2];
20532 rtx_code_label *label2;
20533 enum rtx_code code1, code2, code3;
20534 machine_mode submode;
20536 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
20538 std::swap (op0, op1);
20539 code = swap_condition (code);
20542 split_double_mode (mode, &op0, 1, lo+0, hi+0);
20543 split_double_mode (mode, &op1, 1, lo+1, hi+1);
20545 submode = mode == DImode ? SImode : DImode;
20547 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
20548 avoid two branches. This costs one extra insn, so disable when
20549 optimizing for size. */
20551 if ((code == EQ || code == NE)
20552 && (!optimize_insn_for_size_p ()
20553 || hi[1] == const0_rtx || lo[1] == const0_rtx))
20555 rtx xor0, xor1;
20557 xor1 = hi[0];
20558 if (hi[1] != const0_rtx)
20559 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
20560 NULL_RTX, 0, OPTAB_WIDEN);
20562 xor0 = lo[0];
20563 if (lo[1] != const0_rtx)
20564 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
20565 NULL_RTX, 0, OPTAB_WIDEN);
20567 tmp = expand_binop (submode, ior_optab, xor1, xor0,
20568 NULL_RTX, 0, OPTAB_WIDEN);
20570 ix86_expand_branch (code, tmp, const0_rtx, label);
20571 return;
20574 /* Otherwise, if we are doing less-than or greater-or-equal-than,
20575 op1 is a constant and the low word is zero, then we can just
20576 examine the high word. Similarly for low word -1 and
20577 less-or-equal-than or greater-than. */
20579 if (CONST_INT_P (hi[1]))
20580 switch (code)
20582 case LT: case LTU: case GE: case GEU:
20583 if (lo[1] == const0_rtx)
20585 ix86_expand_branch (code, hi[0], hi[1], label);
20586 return;
20588 break;
20589 case LE: case LEU: case GT: case GTU:
20590 if (lo[1] == constm1_rtx)
20592 ix86_expand_branch (code, hi[0], hi[1], label);
20593 return;
20595 break;
20596 default:
20597 break;
20600 /* Otherwise, we need two or three jumps. */
20602 label2 = gen_label_rtx ();
20604 code1 = code;
20605 code2 = swap_condition (code);
20606 code3 = unsigned_condition (code);
20608 switch (code)
20610 case LT: case GT: case LTU: case GTU:
20611 break;
20613 case LE: code1 = LT; code2 = GT; break;
20614 case GE: code1 = GT; code2 = LT; break;
20615 case LEU: code1 = LTU; code2 = GTU; break;
20616 case GEU: code1 = GTU; code2 = LTU; break;
20618 case EQ: code1 = UNKNOWN; code2 = NE; break;
20619 case NE: code2 = UNKNOWN; break;
20621 default:
20622 gcc_unreachable ();
20626 * a < b =>
20627 * if (hi(a) < hi(b)) goto true;
20628 * if (hi(a) > hi(b)) goto false;
20629 * if (lo(a) < lo(b)) goto true;
20630 * false:
20633 if (code1 != UNKNOWN)
20634 ix86_expand_branch (code1, hi[0], hi[1], label);
20635 if (code2 != UNKNOWN)
20636 ix86_expand_branch (code2, hi[0], hi[1], label2);
20638 ix86_expand_branch (code3, lo[0], lo[1], label);
20640 if (code2 != UNKNOWN)
20641 emit_label (label2);
20642 return;
20645 default:
20646 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20647 goto simple;
20651 /* Split branch based on floating point condition. */
20652 void
20653 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20654 rtx target1, rtx target2, rtx tmp)
20656 rtx condition;
20657 rtx i;
20659 if (target2 != pc_rtx)
20661 std::swap (target1, target2);
20662 code = reverse_condition_maybe_unordered (code);
20665 condition = ix86_expand_fp_compare (code, op1, op2,
20666 tmp);
20668 i = emit_jump_insn (gen_rtx_SET
20669 (pc_rtx,
20670 gen_rtx_IF_THEN_ELSE (VOIDmode,
20671 condition, target1, target2)));
20672 if (split_branch_probability >= 0)
20673 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20676 void
20677 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20679 rtx ret;
20681 gcc_assert (GET_MODE (dest) == QImode);
20683 ret = ix86_expand_compare (code, op0, op1);
20684 PUT_MODE (ret, QImode);
20685 emit_insn (gen_rtx_SET (dest, ret));
20688 /* Expand comparison setting or clearing carry flag. Return true when
20689 successful and set pop for the operation. */
20690 static bool
20691 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20693 machine_mode mode =
20694 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20696 /* Do not handle double-mode compares that go through special path. */
20697 if (mode == (TARGET_64BIT ? TImode : DImode))
20698 return false;
20700 if (SCALAR_FLOAT_MODE_P (mode))
20702 rtx compare_op;
20703 rtx_insn *compare_seq;
20705 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20707 /* Shortcut: following common codes never translate
20708 into carry flag compares. */
20709 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20710 || code == ORDERED || code == UNORDERED)
20711 return false;
20713 /* These comparisons require zero flag; swap operands so they won't. */
20714 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20715 && !TARGET_IEEE_FP)
20717 std::swap (op0, op1);
20718 code = swap_condition (code);
20721 /* Try to expand the comparison and verify that we end up with
20722 carry flag based comparison. This fails to be true only when
20723 we decide to expand comparison using arithmetic that is not
20724 too common scenario. */
20725 start_sequence ();
20726 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20727 compare_seq = get_insns ();
20728 end_sequence ();
20730 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20731 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20732 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20733 else
20734 code = GET_CODE (compare_op);
20736 if (code != LTU && code != GEU)
20737 return false;
20739 emit_insn (compare_seq);
20740 *pop = compare_op;
20741 return true;
20744 if (!INTEGRAL_MODE_P (mode))
20745 return false;
20747 switch (code)
20749 case LTU:
20750 case GEU:
20751 break;
20753 /* Convert a==0 into (unsigned)a<1. */
20754 case EQ:
20755 case NE:
20756 if (op1 != const0_rtx)
20757 return false;
20758 op1 = const1_rtx;
20759 code = (code == EQ ? LTU : GEU);
20760 break;
20762 /* Convert a>b into b<a or a>=b-1. */
20763 case GTU:
20764 case LEU:
20765 if (CONST_INT_P (op1))
20767 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20768 /* Bail out on overflow. We still can swap operands but that
20769 would force loading of the constant into register. */
20770 if (op1 == const0_rtx
20771 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20772 return false;
20773 code = (code == GTU ? GEU : LTU);
20775 else
20777 std::swap (op0, op1);
20778 code = (code == GTU ? LTU : GEU);
20780 break;
20782 /* Convert a>=0 into (unsigned)a<0x80000000. */
20783 case LT:
20784 case GE:
20785 if (mode == DImode || op1 != const0_rtx)
20786 return false;
20787 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20788 code = (code == LT ? GEU : LTU);
20789 break;
20790 case LE:
20791 case GT:
20792 if (mode == DImode || op1 != constm1_rtx)
20793 return false;
20794 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20795 code = (code == LE ? GEU : LTU);
20796 break;
20798 default:
20799 return false;
20801 /* Swapping operands may cause constant to appear as first operand. */
20802 if (!nonimmediate_operand (op0, VOIDmode))
20804 if (!can_create_pseudo_p ())
20805 return false;
20806 op0 = force_reg (mode, op0);
20808 *pop = ix86_expand_compare (code, op0, op1);
20809 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20810 return true;
20813 bool
20814 ix86_expand_int_movcc (rtx operands[])
20816 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20817 rtx_insn *compare_seq;
20818 rtx compare_op;
20819 machine_mode mode = GET_MODE (operands[0]);
20820 bool sign_bit_compare_p = false;
20821 rtx op0 = XEXP (operands[1], 0);
20822 rtx op1 = XEXP (operands[1], 1);
20824 if (GET_MODE (op0) == TImode
20825 || (GET_MODE (op0) == DImode
20826 && !TARGET_64BIT))
20827 return false;
20829 start_sequence ();
20830 compare_op = ix86_expand_compare (code, op0, op1);
20831 compare_seq = get_insns ();
20832 end_sequence ();
20834 compare_code = GET_CODE (compare_op);
20836 if ((op1 == const0_rtx && (code == GE || code == LT))
20837 || (op1 == constm1_rtx && (code == GT || code == LE)))
20838 sign_bit_compare_p = true;
20840 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20841 HImode insns, we'd be swallowed in word prefix ops. */
20843 if ((mode != HImode || TARGET_FAST_PREFIX)
20844 && (mode != (TARGET_64BIT ? TImode : DImode))
20845 && CONST_INT_P (operands[2])
20846 && CONST_INT_P (operands[3]))
20848 rtx out = operands[0];
20849 HOST_WIDE_INT ct = INTVAL (operands[2]);
20850 HOST_WIDE_INT cf = INTVAL (operands[3]);
20851 HOST_WIDE_INT diff;
20853 diff = ct - cf;
20854 /* Sign bit compares are better done using shifts than we do by using
20855 sbb. */
20856 if (sign_bit_compare_p
20857 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20859 /* Detect overlap between destination and compare sources. */
20860 rtx tmp = out;
20862 if (!sign_bit_compare_p)
20864 rtx flags;
20865 bool fpcmp = false;
20867 compare_code = GET_CODE (compare_op);
20869 flags = XEXP (compare_op, 0);
20871 if (GET_MODE (flags) == CCFPmode
20872 || GET_MODE (flags) == CCFPUmode)
20874 fpcmp = true;
20875 compare_code
20876 = ix86_fp_compare_code_to_integer (compare_code);
20879 /* To simplify rest of code, restrict to the GEU case. */
20880 if (compare_code == LTU)
20882 std::swap (ct, cf);
20883 compare_code = reverse_condition (compare_code);
20884 code = reverse_condition (code);
20886 else
20888 if (fpcmp)
20889 PUT_CODE (compare_op,
20890 reverse_condition_maybe_unordered
20891 (GET_CODE (compare_op)));
20892 else
20893 PUT_CODE (compare_op,
20894 reverse_condition (GET_CODE (compare_op)));
20896 diff = ct - cf;
20898 if (reg_overlap_mentioned_p (out, op0)
20899 || reg_overlap_mentioned_p (out, op1))
20900 tmp = gen_reg_rtx (mode);
20902 if (mode == DImode)
20903 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20904 else
20905 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20906 flags, compare_op));
20908 else
20910 if (code == GT || code == GE)
20911 code = reverse_condition (code);
20912 else
20914 std::swap (ct, cf);
20915 diff = ct - cf;
20917 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20920 if (diff == 1)
20923 * cmpl op0,op1
20924 * sbbl dest,dest
20925 * [addl dest, ct]
20927 * Size 5 - 8.
20929 if (ct)
20930 tmp = expand_simple_binop (mode, PLUS,
20931 tmp, GEN_INT (ct),
20932 copy_rtx (tmp), 1, OPTAB_DIRECT);
20934 else if (cf == -1)
20937 * cmpl op0,op1
20938 * sbbl dest,dest
20939 * orl $ct, dest
20941 * Size 8.
20943 tmp = expand_simple_binop (mode, IOR,
20944 tmp, GEN_INT (ct),
20945 copy_rtx (tmp), 1, OPTAB_DIRECT);
20947 else if (diff == -1 && ct)
20950 * cmpl op0,op1
20951 * sbbl dest,dest
20952 * notl dest
20953 * [addl dest, cf]
20955 * Size 8 - 11.
20957 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20958 if (cf)
20959 tmp = expand_simple_binop (mode, PLUS,
20960 copy_rtx (tmp), GEN_INT (cf),
20961 copy_rtx (tmp), 1, OPTAB_DIRECT);
20963 else
20966 * cmpl op0,op1
20967 * sbbl dest,dest
20968 * [notl dest]
20969 * andl cf - ct, dest
20970 * [addl dest, ct]
20972 * Size 8 - 11.
20975 if (cf == 0)
20977 cf = ct;
20978 ct = 0;
20979 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20982 tmp = expand_simple_binop (mode, AND,
20983 copy_rtx (tmp),
20984 gen_int_mode (cf - ct, mode),
20985 copy_rtx (tmp), 1, OPTAB_DIRECT);
20986 if (ct)
20987 tmp = expand_simple_binop (mode, PLUS,
20988 copy_rtx (tmp), GEN_INT (ct),
20989 copy_rtx (tmp), 1, OPTAB_DIRECT);
20992 if (!rtx_equal_p (tmp, out))
20993 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20995 return true;
20998 if (diff < 0)
21000 machine_mode cmp_mode = GET_MODE (op0);
21001 enum rtx_code new_code;
21003 if (SCALAR_FLOAT_MODE_P (cmp_mode))
21005 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
21007 /* We may be reversing unordered compare to normal compare, that
21008 is not valid in general (we may convert non-trapping condition
21009 to trapping one), however on i386 we currently emit all
21010 comparisons unordered. */
21011 new_code = reverse_condition_maybe_unordered (code);
21013 else
21014 new_code = ix86_reverse_condition (code, cmp_mode);
21015 if (new_code != UNKNOWN)
21017 std::swap (ct, cf);
21018 diff = -diff;
21019 code = new_code;
21023 compare_code = UNKNOWN;
21024 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
21025 && CONST_INT_P (op1))
21027 if (op1 == const0_rtx
21028 && (code == LT || code == GE))
21029 compare_code = code;
21030 else if (op1 == constm1_rtx)
21032 if (code == LE)
21033 compare_code = LT;
21034 else if (code == GT)
21035 compare_code = GE;
21039 /* Optimize dest = (op0 < 0) ? -1 : cf. */
21040 if (compare_code != UNKNOWN
21041 && GET_MODE (op0) == GET_MODE (out)
21042 && (cf == -1 || ct == -1))
21044 /* If lea code below could be used, only optimize
21045 if it results in a 2 insn sequence. */
21047 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
21048 || diff == 3 || diff == 5 || diff == 9)
21049 || (compare_code == LT && ct == -1)
21050 || (compare_code == GE && cf == -1))
21053 * notl op1 (if necessary)
21054 * sarl $31, op1
21055 * orl cf, op1
21057 if (ct != -1)
21059 cf = ct;
21060 ct = -1;
21061 code = reverse_condition (code);
21064 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
21066 out = expand_simple_binop (mode, IOR,
21067 out, GEN_INT (cf),
21068 out, 1, OPTAB_DIRECT);
21069 if (out != operands[0])
21070 emit_move_insn (operands[0], out);
21072 return true;
21077 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
21078 || diff == 3 || diff == 5 || diff == 9)
21079 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
21080 && (mode != DImode
21081 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
21084 * xorl dest,dest
21085 * cmpl op1,op2
21086 * setcc dest
21087 * lea cf(dest*(ct-cf)),dest
21089 * Size 14.
21091 * This also catches the degenerate setcc-only case.
21094 rtx tmp;
21095 int nops;
21097 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
21099 nops = 0;
21100 /* On x86_64 the lea instruction operates on Pmode, so we need
21101 to get arithmetics done in proper mode to match. */
21102 if (diff == 1)
21103 tmp = copy_rtx (out);
21104 else
21106 rtx out1;
21107 out1 = copy_rtx (out);
21108 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
21109 nops++;
21110 if (diff & 1)
21112 tmp = gen_rtx_PLUS (mode, tmp, out1);
21113 nops++;
21116 if (cf != 0)
21118 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
21119 nops++;
21121 if (!rtx_equal_p (tmp, out))
21123 if (nops == 1)
21124 out = force_operand (tmp, copy_rtx (out));
21125 else
21126 emit_insn (gen_rtx_SET (copy_rtx (out), copy_rtx (tmp)));
21128 if (!rtx_equal_p (out, operands[0]))
21129 emit_move_insn (operands[0], copy_rtx (out));
21131 return true;
21135 * General case: Jumpful:
21136 * xorl dest,dest cmpl op1, op2
21137 * cmpl op1, op2 movl ct, dest
21138 * setcc dest jcc 1f
21139 * decl dest movl cf, dest
21140 * andl (cf-ct),dest 1:
21141 * addl ct,dest
21143 * Size 20. Size 14.
21145 * This is reasonably steep, but branch mispredict costs are
21146 * high on modern cpus, so consider failing only if optimizing
21147 * for space.
21150 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
21151 && BRANCH_COST (optimize_insn_for_speed_p (),
21152 false) >= 2)
21154 if (cf == 0)
21156 machine_mode cmp_mode = GET_MODE (op0);
21157 enum rtx_code new_code;
21159 if (SCALAR_FLOAT_MODE_P (cmp_mode))
21161 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
21163 /* We may be reversing unordered compare to normal compare,
21164 that is not valid in general (we may convert non-trapping
21165 condition to trapping one), however on i386 we currently
21166 emit all comparisons unordered. */
21167 new_code = reverse_condition_maybe_unordered (code);
21169 else
21171 new_code = ix86_reverse_condition (code, cmp_mode);
21172 if (compare_code != UNKNOWN && new_code != UNKNOWN)
21173 compare_code = reverse_condition (compare_code);
21176 if (new_code != UNKNOWN)
21178 cf = ct;
21179 ct = 0;
21180 code = new_code;
21184 if (compare_code != UNKNOWN)
21186 /* notl op1 (if needed)
21187 sarl $31, op1
21188 andl (cf-ct), op1
21189 addl ct, op1
21191 For x < 0 (resp. x <= -1) there will be no notl,
21192 so if possible swap the constants to get rid of the
21193 complement.
21194 True/false will be -1/0 while code below (store flag
21195 followed by decrement) is 0/-1, so the constants need
21196 to be exchanged once more. */
21198 if (compare_code == GE || !cf)
21200 code = reverse_condition (code);
21201 compare_code = LT;
21203 else
21204 std::swap (ct, cf);
21206 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
21208 else
21210 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
21212 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
21213 constm1_rtx,
21214 copy_rtx (out), 1, OPTAB_DIRECT);
21217 out = expand_simple_binop (mode, AND, copy_rtx (out),
21218 gen_int_mode (cf - ct, mode),
21219 copy_rtx (out), 1, OPTAB_DIRECT);
21220 if (ct)
21221 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
21222 copy_rtx (out), 1, OPTAB_DIRECT);
21223 if (!rtx_equal_p (out, operands[0]))
21224 emit_move_insn (operands[0], copy_rtx (out));
21226 return true;
21230 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
21232 /* Try a few things more with specific constants and a variable. */
21234 optab op;
21235 rtx var, orig_out, out, tmp;
21237 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
21238 return false;
21240 /* If one of the two operands is an interesting constant, load a
21241 constant with the above and mask it in with a logical operation. */
21243 if (CONST_INT_P (operands[2]))
21245 var = operands[3];
21246 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
21247 operands[3] = constm1_rtx, op = and_optab;
21248 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
21249 operands[3] = const0_rtx, op = ior_optab;
21250 else
21251 return false;
21253 else if (CONST_INT_P (operands[3]))
21255 var = operands[2];
21256 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
21257 operands[2] = constm1_rtx, op = and_optab;
21258 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
21259 operands[2] = const0_rtx, op = ior_optab;
21260 else
21261 return false;
21263 else
21264 return false;
21266 orig_out = operands[0];
21267 tmp = gen_reg_rtx (mode);
21268 operands[0] = tmp;
21270 /* Recurse to get the constant loaded. */
21271 if (ix86_expand_int_movcc (operands) == 0)
21272 return false;
21274 /* Mask in the interesting variable. */
21275 out = expand_binop (mode, op, var, tmp, orig_out, 0,
21276 OPTAB_WIDEN);
21277 if (!rtx_equal_p (out, orig_out))
21278 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
21280 return true;
21284 * For comparison with above,
21286 * movl cf,dest
21287 * movl ct,tmp
21288 * cmpl op1,op2
21289 * cmovcc tmp,dest
21291 * Size 15.
21294 if (! nonimmediate_operand (operands[2], mode))
21295 operands[2] = force_reg (mode, operands[2]);
21296 if (! nonimmediate_operand (operands[3], mode))
21297 operands[3] = force_reg (mode, operands[3]);
21299 if (! register_operand (operands[2], VOIDmode)
21300 && (mode == QImode
21301 || ! register_operand (operands[3], VOIDmode)))
21302 operands[2] = force_reg (mode, operands[2]);
21304 if (mode == QImode
21305 && ! register_operand (operands[3], VOIDmode))
21306 operands[3] = force_reg (mode, operands[3]);
21308 emit_insn (compare_seq);
21309 emit_insn (gen_rtx_SET (operands[0],
21310 gen_rtx_IF_THEN_ELSE (mode,
21311 compare_op, operands[2],
21312 operands[3])));
21313 return true;
21316 /* Swap, force into registers, or otherwise massage the two operands
21317 to an sse comparison with a mask result. Thus we differ a bit from
21318 ix86_prepare_fp_compare_args which expects to produce a flags result.
21320 The DEST operand exists to help determine whether to commute commutative
21321 operators. The POP0/POP1 operands are updated in place. The new
21322 comparison code is returned, or UNKNOWN if not implementable. */
21324 static enum rtx_code
21325 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
21326 rtx *pop0, rtx *pop1)
21328 switch (code)
21330 case LTGT:
21331 case UNEQ:
21332 /* AVX supports all the needed comparisons. */
21333 if (TARGET_AVX)
21334 break;
21335 /* We have no LTGT as an operator. We could implement it with
21336 NE & ORDERED, but this requires an extra temporary. It's
21337 not clear that it's worth it. */
21338 return UNKNOWN;
21340 case LT:
21341 case LE:
21342 case UNGT:
21343 case UNGE:
21344 /* These are supported directly. */
21345 break;
21347 case EQ:
21348 case NE:
21349 case UNORDERED:
21350 case ORDERED:
21351 /* AVX has 3 operand comparisons, no need to swap anything. */
21352 if (TARGET_AVX)
21353 break;
21354 /* For commutative operators, try to canonicalize the destination
21355 operand to be first in the comparison - this helps reload to
21356 avoid extra moves. */
21357 if (!dest || !rtx_equal_p (dest, *pop1))
21358 break;
21359 /* FALLTHRU */
21361 case GE:
21362 case GT:
21363 case UNLE:
21364 case UNLT:
21365 /* These are not supported directly before AVX, and furthermore
21366 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
21367 comparison operands to transform into something that is
21368 supported. */
21369 std::swap (*pop0, *pop1);
21370 code = swap_condition (code);
21371 break;
21373 default:
21374 gcc_unreachable ();
21377 return code;
21380 /* Detect conditional moves that exactly match min/max operational
21381 semantics. Note that this is IEEE safe, as long as we don't
21382 interchange the operands.
21384 Returns FALSE if this conditional move doesn't match a MIN/MAX,
21385 and TRUE if the operation is successful and instructions are emitted. */
21387 static bool
21388 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
21389 rtx cmp_op1, rtx if_true, rtx if_false)
21391 machine_mode mode;
21392 bool is_min;
21393 rtx tmp;
21395 if (code == LT)
21397 else if (code == UNGE)
21398 std::swap (if_true, if_false);
21399 else
21400 return false;
21402 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
21403 is_min = true;
21404 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
21405 is_min = false;
21406 else
21407 return false;
21409 mode = GET_MODE (dest);
21411 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
21412 but MODE may be a vector mode and thus not appropriate. */
21413 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
21415 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
21416 rtvec v;
21418 if_true = force_reg (mode, if_true);
21419 v = gen_rtvec (2, if_true, if_false);
21420 tmp = gen_rtx_UNSPEC (mode, v, u);
21422 else
21424 code = is_min ? SMIN : SMAX;
21425 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
21428 emit_insn (gen_rtx_SET (dest, tmp));
21429 return true;
21432 /* Expand an sse vector comparison. Return the register with the result. */
21434 static rtx
21435 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
21436 rtx op_true, rtx op_false)
21438 machine_mode mode = GET_MODE (dest);
21439 machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
21441 /* In general case result of comparison can differ from operands' type. */
21442 machine_mode cmp_mode;
21444 /* In AVX512F the result of comparison is an integer mask. */
21445 bool maskcmp = false;
21446 rtx x;
21448 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
21450 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
21451 gcc_assert (cmp_mode != BLKmode);
21453 maskcmp = true;
21455 else
21456 cmp_mode = cmp_ops_mode;
21459 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
21460 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
21461 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
21463 if (optimize
21464 || reg_overlap_mentioned_p (dest, op_true)
21465 || reg_overlap_mentioned_p (dest, op_false))
21466 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
21468 /* Compare patterns for int modes are unspec in AVX512F only. */
21469 if (maskcmp && (code == GT || code == EQ))
21471 rtx (*gen)(rtx, rtx, rtx);
21473 switch (cmp_ops_mode)
21475 case V64QImode:
21476 gcc_assert (TARGET_AVX512BW);
21477 gen = code == GT ? gen_avx512bw_gtv64qi3 : gen_avx512bw_eqv64qi3_1;
21478 break;
21479 case V32HImode:
21480 gcc_assert (TARGET_AVX512BW);
21481 gen = code == GT ? gen_avx512bw_gtv32hi3 : gen_avx512bw_eqv32hi3_1;
21482 break;
21483 case V16SImode:
21484 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
21485 break;
21486 case V8DImode:
21487 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
21488 break;
21489 default:
21490 gen = NULL;
21493 if (gen)
21495 emit_insn (gen (dest, cmp_op0, cmp_op1));
21496 return dest;
21499 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
21501 if (cmp_mode != mode && !maskcmp)
21503 x = force_reg (cmp_ops_mode, x);
21504 convert_move (dest, x, false);
21506 else
21507 emit_insn (gen_rtx_SET (dest, x));
21509 return dest;
21512 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
21513 operations. This is used for both scalar and vector conditional moves. */
21515 static void
21516 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
21518 machine_mode mode = GET_MODE (dest);
21519 machine_mode cmpmode = GET_MODE (cmp);
21521 /* In AVX512F the result of comparison is an integer mask. */
21522 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
21524 rtx t2, t3, x;
21526 if (vector_all_ones_operand (op_true, mode)
21527 && rtx_equal_p (op_false, CONST0_RTX (mode))
21528 && !maskcmp)
21530 emit_insn (gen_rtx_SET (dest, cmp));
21532 else if (op_false == CONST0_RTX (mode)
21533 && !maskcmp)
21535 op_true = force_reg (mode, op_true);
21536 x = gen_rtx_AND (mode, cmp, op_true);
21537 emit_insn (gen_rtx_SET (dest, x));
21539 else if (op_true == CONST0_RTX (mode)
21540 && !maskcmp)
21542 op_false = force_reg (mode, op_false);
21543 x = gen_rtx_NOT (mode, cmp);
21544 x = gen_rtx_AND (mode, x, op_false);
21545 emit_insn (gen_rtx_SET (dest, x));
21547 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
21548 && !maskcmp)
21550 op_false = force_reg (mode, op_false);
21551 x = gen_rtx_IOR (mode, cmp, op_false);
21552 emit_insn (gen_rtx_SET (dest, x));
21554 else if (TARGET_XOP
21555 && !maskcmp)
21557 op_true = force_reg (mode, op_true);
21559 if (!nonimmediate_operand (op_false, mode))
21560 op_false = force_reg (mode, op_false);
21562 emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cmp,
21563 op_true,
21564 op_false)));
21566 else
21568 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21569 rtx d = dest;
21571 if (!nonimmediate_operand (op_true, mode))
21572 op_true = force_reg (mode, op_true);
21574 op_false = force_reg (mode, op_false);
21576 switch (mode)
21578 case V4SFmode:
21579 if (TARGET_SSE4_1)
21580 gen = gen_sse4_1_blendvps;
21581 break;
21582 case V2DFmode:
21583 if (TARGET_SSE4_1)
21584 gen = gen_sse4_1_blendvpd;
21585 break;
21586 case V16QImode:
21587 case V8HImode:
21588 case V4SImode:
21589 case V2DImode:
21590 if (TARGET_SSE4_1)
21592 gen = gen_sse4_1_pblendvb;
21593 if (mode != V16QImode)
21594 d = gen_reg_rtx (V16QImode);
21595 op_false = gen_lowpart (V16QImode, op_false);
21596 op_true = gen_lowpart (V16QImode, op_true);
21597 cmp = gen_lowpart (V16QImode, cmp);
21599 break;
21600 case V8SFmode:
21601 if (TARGET_AVX)
21602 gen = gen_avx_blendvps256;
21603 break;
21604 case V4DFmode:
21605 if (TARGET_AVX)
21606 gen = gen_avx_blendvpd256;
21607 break;
21608 case V32QImode:
21609 case V16HImode:
21610 case V8SImode:
21611 case V4DImode:
21612 if (TARGET_AVX2)
21614 gen = gen_avx2_pblendvb;
21615 if (mode != V32QImode)
21616 d = gen_reg_rtx (V32QImode);
21617 op_false = gen_lowpart (V32QImode, op_false);
21618 op_true = gen_lowpart (V32QImode, op_true);
21619 cmp = gen_lowpart (V32QImode, cmp);
21621 break;
21623 case V64QImode:
21624 gen = gen_avx512bw_blendmv64qi;
21625 break;
21626 case V32HImode:
21627 gen = gen_avx512bw_blendmv32hi;
21628 break;
21629 case V16SImode:
21630 gen = gen_avx512f_blendmv16si;
21631 break;
21632 case V8DImode:
21633 gen = gen_avx512f_blendmv8di;
21634 break;
21635 case V8DFmode:
21636 gen = gen_avx512f_blendmv8df;
21637 break;
21638 case V16SFmode:
21639 gen = gen_avx512f_blendmv16sf;
21640 break;
21642 default:
21643 break;
21646 if (gen != NULL)
21648 emit_insn (gen (d, op_false, op_true, cmp));
21649 if (d != dest)
21650 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21652 else
21654 op_true = force_reg (mode, op_true);
21656 t2 = gen_reg_rtx (mode);
21657 if (optimize)
21658 t3 = gen_reg_rtx (mode);
21659 else
21660 t3 = dest;
21662 x = gen_rtx_AND (mode, op_true, cmp);
21663 emit_insn (gen_rtx_SET (t2, x));
21665 x = gen_rtx_NOT (mode, cmp);
21666 x = gen_rtx_AND (mode, x, op_false);
21667 emit_insn (gen_rtx_SET (t3, x));
21669 x = gen_rtx_IOR (mode, t3, t2);
21670 emit_insn (gen_rtx_SET (dest, x));
21675 /* Expand a floating-point conditional move. Return true if successful. */
21677 bool
21678 ix86_expand_fp_movcc (rtx operands[])
21680 machine_mode mode = GET_MODE (operands[0]);
21681 enum rtx_code code = GET_CODE (operands[1]);
21682 rtx tmp, compare_op;
21683 rtx op0 = XEXP (operands[1], 0);
21684 rtx op1 = XEXP (operands[1], 1);
21686 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21688 machine_mode cmode;
21690 /* Since we've no cmove for sse registers, don't force bad register
21691 allocation just to gain access to it. Deny movcc when the
21692 comparison mode doesn't match the move mode. */
21693 cmode = GET_MODE (op0);
21694 if (cmode == VOIDmode)
21695 cmode = GET_MODE (op1);
21696 if (cmode != mode)
21697 return false;
21699 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21700 if (code == UNKNOWN)
21701 return false;
21703 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21704 operands[2], operands[3]))
21705 return true;
21707 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21708 operands[2], operands[3]);
21709 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21710 return true;
21713 if (GET_MODE (op0) == TImode
21714 || (GET_MODE (op0) == DImode
21715 && !TARGET_64BIT))
21716 return false;
21718 /* The floating point conditional move instructions don't directly
21719 support conditions resulting from a signed integer comparison. */
21721 compare_op = ix86_expand_compare (code, op0, op1);
21722 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21724 tmp = gen_reg_rtx (QImode);
21725 ix86_expand_setcc (tmp, code, op0, op1);
21727 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21730 emit_insn (gen_rtx_SET (operands[0],
21731 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21732 operands[2], operands[3])));
21734 return true;
21737 /* Expand a floating-point vector conditional move; a vcond operation
21738 rather than a movcc operation. */
21740 bool
21741 ix86_expand_fp_vcond (rtx operands[])
21743 enum rtx_code code = GET_CODE (operands[3]);
21744 rtx cmp;
21746 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21747 &operands[4], &operands[5]);
21748 if (code == UNKNOWN)
21750 rtx temp;
21751 switch (GET_CODE (operands[3]))
21753 case LTGT:
21754 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21755 operands[5], operands[0], operands[0]);
21756 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21757 operands[5], operands[1], operands[2]);
21758 code = AND;
21759 break;
21760 case UNEQ:
21761 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21762 operands[5], operands[0], operands[0]);
21763 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21764 operands[5], operands[1], operands[2]);
21765 code = IOR;
21766 break;
21767 default:
21768 gcc_unreachable ();
21770 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21771 OPTAB_DIRECT);
21772 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21773 return true;
21776 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21777 operands[5], operands[1], operands[2]))
21778 return true;
21780 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21781 operands[1], operands[2]);
21782 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21783 return true;
21786 /* Expand a signed/unsigned integral vector conditional move. */
21788 bool
21789 ix86_expand_int_vcond (rtx operands[])
21791 machine_mode data_mode = GET_MODE (operands[0]);
21792 machine_mode mode = GET_MODE (operands[4]);
21793 enum rtx_code code = GET_CODE (operands[3]);
21794 bool negate = false;
21795 rtx x, cop0, cop1;
21797 cop0 = operands[4];
21798 cop1 = operands[5];
21800 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21801 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21802 if ((code == LT || code == GE)
21803 && data_mode == mode
21804 && cop1 == CONST0_RTX (mode)
21805 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21806 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21807 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21808 && (GET_MODE_SIZE (data_mode) == 16
21809 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21811 rtx negop = operands[2 - (code == LT)];
21812 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21813 if (negop == CONST1_RTX (data_mode))
21815 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21816 operands[0], 1, OPTAB_DIRECT);
21817 if (res != operands[0])
21818 emit_move_insn (operands[0], res);
21819 return true;
21821 else if (GET_MODE_INNER (data_mode) != DImode
21822 && vector_all_ones_operand (negop, data_mode))
21824 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21825 operands[0], 0, OPTAB_DIRECT);
21826 if (res != operands[0])
21827 emit_move_insn (operands[0], res);
21828 return true;
21832 if (!nonimmediate_operand (cop1, mode))
21833 cop1 = force_reg (mode, cop1);
21834 if (!general_operand (operands[1], data_mode))
21835 operands[1] = force_reg (data_mode, operands[1]);
21836 if (!general_operand (operands[2], data_mode))
21837 operands[2] = force_reg (data_mode, operands[2]);
21839 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21840 if (TARGET_XOP
21841 && (mode == V16QImode || mode == V8HImode
21842 || mode == V4SImode || mode == V2DImode))
21844 else
21846 /* Canonicalize the comparison to EQ, GT, GTU. */
21847 switch (code)
21849 case EQ:
21850 case GT:
21851 case GTU:
21852 break;
21854 case NE:
21855 case LE:
21856 case LEU:
21857 code = reverse_condition (code);
21858 negate = true;
21859 break;
21861 case GE:
21862 case GEU:
21863 code = reverse_condition (code);
21864 negate = true;
21865 /* FALLTHRU */
21867 case LT:
21868 case LTU:
21869 std::swap (cop0, cop1);
21870 code = swap_condition (code);
21871 break;
21873 default:
21874 gcc_unreachable ();
21877 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21878 if (mode == V2DImode)
21880 switch (code)
21882 case EQ:
21883 /* SSE4.1 supports EQ. */
21884 if (!TARGET_SSE4_1)
21885 return false;
21886 break;
21888 case GT:
21889 case GTU:
21890 /* SSE4.2 supports GT/GTU. */
21891 if (!TARGET_SSE4_2)
21892 return false;
21893 break;
21895 default:
21896 gcc_unreachable ();
21900 /* Unsigned parallel compare is not supported by the hardware.
21901 Play some tricks to turn this into a signed comparison
21902 against 0. */
21903 if (code == GTU)
21905 cop0 = force_reg (mode, cop0);
21907 switch (mode)
21909 case V16SImode:
21910 case V8DImode:
21911 case V8SImode:
21912 case V4DImode:
21913 case V4SImode:
21914 case V2DImode:
21916 rtx t1, t2, mask;
21917 rtx (*gen_sub3) (rtx, rtx, rtx);
21919 switch (mode)
21921 case V16SImode: gen_sub3 = gen_subv16si3; break;
21922 case V8DImode: gen_sub3 = gen_subv8di3; break;
21923 case V8SImode: gen_sub3 = gen_subv8si3; break;
21924 case V4DImode: gen_sub3 = gen_subv4di3; break;
21925 case V4SImode: gen_sub3 = gen_subv4si3; break;
21926 case V2DImode: gen_sub3 = gen_subv2di3; break;
21927 default:
21928 gcc_unreachable ();
21930 /* Subtract (-(INT MAX) - 1) from both operands to make
21931 them signed. */
21932 mask = ix86_build_signbit_mask (mode, true, false);
21933 t1 = gen_reg_rtx (mode);
21934 emit_insn (gen_sub3 (t1, cop0, mask));
21936 t2 = gen_reg_rtx (mode);
21937 emit_insn (gen_sub3 (t2, cop1, mask));
21939 cop0 = t1;
21940 cop1 = t2;
21941 code = GT;
21943 break;
21945 case V64QImode:
21946 case V32HImode:
21947 case V32QImode:
21948 case V16HImode:
21949 case V16QImode:
21950 case V8HImode:
21951 /* Perform a parallel unsigned saturating subtraction. */
21952 x = gen_reg_rtx (mode);
21953 emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, cop0, cop1)));
21955 cop0 = x;
21956 cop1 = CONST0_RTX (mode);
21957 code = EQ;
21958 negate = !negate;
21959 break;
21961 default:
21962 gcc_unreachable ();
21967 /* Allow the comparison to be done in one mode, but the movcc to
21968 happen in another mode. */
21969 if (data_mode == mode)
21971 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21972 operands[1+negate], operands[2-negate]);
21974 else
21976 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21977 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21978 operands[1+negate], operands[2-negate]);
21979 if (GET_MODE (x) == mode)
21980 x = gen_lowpart (data_mode, x);
21983 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21984 operands[2-negate]);
21985 return true;
21988 /* AVX512F does support 64-byte integer vector operations,
21989 thus the longest vector we are faced with is V64QImode. */
21990 #define MAX_VECT_LEN 64
21992 struct expand_vec_perm_d
21994 rtx target, op0, op1;
21995 unsigned char perm[MAX_VECT_LEN];
21996 machine_mode vmode;
21997 unsigned char nelt;
21998 bool one_operand_p;
21999 bool testing_p;
22002 static bool
22003 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
22004 struct expand_vec_perm_d *d)
22006 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
22007 expander, so args are either in d, or in op0, op1 etc. */
22008 machine_mode mode = GET_MODE (d ? d->op0 : op0);
22009 machine_mode maskmode = mode;
22010 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
22012 switch (mode)
22014 case V8HImode:
22015 if (TARGET_AVX512VL && TARGET_AVX512BW)
22016 gen = gen_avx512vl_vpermi2varv8hi3;
22017 break;
22018 case V16HImode:
22019 if (TARGET_AVX512VL && TARGET_AVX512BW)
22020 gen = gen_avx512vl_vpermi2varv16hi3;
22021 break;
22022 case V64QImode:
22023 if (TARGET_AVX512VBMI)
22024 gen = gen_avx512bw_vpermi2varv64qi3;
22025 break;
22026 case V32HImode:
22027 if (TARGET_AVX512BW)
22028 gen = gen_avx512bw_vpermi2varv32hi3;
22029 break;
22030 case V4SImode:
22031 if (TARGET_AVX512VL)
22032 gen = gen_avx512vl_vpermi2varv4si3;
22033 break;
22034 case V8SImode:
22035 if (TARGET_AVX512VL)
22036 gen = gen_avx512vl_vpermi2varv8si3;
22037 break;
22038 case V16SImode:
22039 if (TARGET_AVX512F)
22040 gen = gen_avx512f_vpermi2varv16si3;
22041 break;
22042 case V4SFmode:
22043 if (TARGET_AVX512VL)
22045 gen = gen_avx512vl_vpermi2varv4sf3;
22046 maskmode = V4SImode;
22048 break;
22049 case V8SFmode:
22050 if (TARGET_AVX512VL)
22052 gen = gen_avx512vl_vpermi2varv8sf3;
22053 maskmode = V8SImode;
22055 break;
22056 case V16SFmode:
22057 if (TARGET_AVX512F)
22059 gen = gen_avx512f_vpermi2varv16sf3;
22060 maskmode = V16SImode;
22062 break;
22063 case V2DImode:
22064 if (TARGET_AVX512VL)
22065 gen = gen_avx512vl_vpermi2varv2di3;
22066 break;
22067 case V4DImode:
22068 if (TARGET_AVX512VL)
22069 gen = gen_avx512vl_vpermi2varv4di3;
22070 break;
22071 case V8DImode:
22072 if (TARGET_AVX512F)
22073 gen = gen_avx512f_vpermi2varv8di3;
22074 break;
22075 case V2DFmode:
22076 if (TARGET_AVX512VL)
22078 gen = gen_avx512vl_vpermi2varv2df3;
22079 maskmode = V2DImode;
22081 break;
22082 case V4DFmode:
22083 if (TARGET_AVX512VL)
22085 gen = gen_avx512vl_vpermi2varv4df3;
22086 maskmode = V4DImode;
22088 break;
22089 case V8DFmode:
22090 if (TARGET_AVX512F)
22092 gen = gen_avx512f_vpermi2varv8df3;
22093 maskmode = V8DImode;
22095 break;
22096 default:
22097 break;
22100 if (gen == NULL)
22101 return false;
22103 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
22104 expander, so args are either in d, or in op0, op1 etc. */
22105 if (d)
22107 rtx vec[64];
22108 target = d->target;
22109 op0 = d->op0;
22110 op1 = d->op1;
22111 for (int i = 0; i < d->nelt; ++i)
22112 vec[i] = GEN_INT (d->perm[i]);
22113 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
22116 emit_insn (gen (target, op0, force_reg (maskmode, mask), op1));
22117 return true;
22120 /* Expand a variable vector permutation. */
22122 void
22123 ix86_expand_vec_perm (rtx operands[])
22125 rtx target = operands[0];
22126 rtx op0 = operands[1];
22127 rtx op1 = operands[2];
22128 rtx mask = operands[3];
22129 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
22130 machine_mode mode = GET_MODE (op0);
22131 machine_mode maskmode = GET_MODE (mask);
22132 int w, e, i;
22133 bool one_operand_shuffle = rtx_equal_p (op0, op1);
22135 /* Number of elements in the vector. */
22136 w = GET_MODE_NUNITS (mode);
22137 e = GET_MODE_UNIT_SIZE (mode);
22138 gcc_assert (w <= 64);
22140 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL))
22141 return;
22143 if (TARGET_AVX2)
22145 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
22147 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
22148 an constant shuffle operand. With a tiny bit of effort we can
22149 use VPERMD instead. A re-interpretation stall for V4DFmode is
22150 unfortunate but there's no avoiding it.
22151 Similarly for V16HImode we don't have instructions for variable
22152 shuffling, while for V32QImode we can use after preparing suitable
22153 masks vpshufb; vpshufb; vpermq; vpor. */
22155 if (mode == V16HImode)
22157 maskmode = mode = V32QImode;
22158 w = 32;
22159 e = 1;
22161 else
22163 maskmode = mode = V8SImode;
22164 w = 8;
22165 e = 4;
22167 t1 = gen_reg_rtx (maskmode);
22169 /* Replicate the low bits of the V4DImode mask into V8SImode:
22170 mask = { A B C D }
22171 t1 = { A A B B C C D D }. */
22172 for (i = 0; i < w / 2; ++i)
22173 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
22174 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22175 vt = force_reg (maskmode, vt);
22176 mask = gen_lowpart (maskmode, mask);
22177 if (maskmode == V8SImode)
22178 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
22179 else
22180 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
22182 /* Multiply the shuffle indicies by two. */
22183 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
22184 OPTAB_DIRECT);
22186 /* Add one to the odd shuffle indicies:
22187 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
22188 for (i = 0; i < w / 2; ++i)
22190 vec[i * 2] = const0_rtx;
22191 vec[i * 2 + 1] = const1_rtx;
22193 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22194 vt = validize_mem (force_const_mem (maskmode, vt));
22195 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
22196 OPTAB_DIRECT);
22198 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
22199 operands[3] = mask = t1;
22200 target = gen_reg_rtx (mode);
22201 op0 = gen_lowpart (mode, op0);
22202 op1 = gen_lowpart (mode, op1);
22205 switch (mode)
22207 case V8SImode:
22208 /* The VPERMD and VPERMPS instructions already properly ignore
22209 the high bits of the shuffle elements. No need for us to
22210 perform an AND ourselves. */
22211 if (one_operand_shuffle)
22213 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
22214 if (target != operands[0])
22215 emit_move_insn (operands[0],
22216 gen_lowpart (GET_MODE (operands[0]), target));
22218 else
22220 t1 = gen_reg_rtx (V8SImode);
22221 t2 = gen_reg_rtx (V8SImode);
22222 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
22223 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
22224 goto merge_two;
22226 return;
22228 case V8SFmode:
22229 mask = gen_lowpart (V8SImode, mask);
22230 if (one_operand_shuffle)
22231 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
22232 else
22234 t1 = gen_reg_rtx (V8SFmode);
22235 t2 = gen_reg_rtx (V8SFmode);
22236 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
22237 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
22238 goto merge_two;
22240 return;
22242 case V4SImode:
22243 /* By combining the two 128-bit input vectors into one 256-bit
22244 input vector, we can use VPERMD and VPERMPS for the full
22245 two-operand shuffle. */
22246 t1 = gen_reg_rtx (V8SImode);
22247 t2 = gen_reg_rtx (V8SImode);
22248 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
22249 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22250 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
22251 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
22252 return;
22254 case V4SFmode:
22255 t1 = gen_reg_rtx (V8SFmode);
22256 t2 = gen_reg_rtx (V8SImode);
22257 mask = gen_lowpart (V4SImode, mask);
22258 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
22259 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22260 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
22261 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
22262 return;
22264 case V32QImode:
22265 t1 = gen_reg_rtx (V32QImode);
22266 t2 = gen_reg_rtx (V32QImode);
22267 t3 = gen_reg_rtx (V32QImode);
22268 vt2 = GEN_INT (-128);
22269 for (i = 0; i < 32; i++)
22270 vec[i] = vt2;
22271 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22272 vt = force_reg (V32QImode, vt);
22273 for (i = 0; i < 32; i++)
22274 vec[i] = i < 16 ? vt2 : const0_rtx;
22275 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22276 vt2 = force_reg (V32QImode, vt2);
22277 /* From mask create two adjusted masks, which contain the same
22278 bits as mask in the low 7 bits of each vector element.
22279 The first mask will have the most significant bit clear
22280 if it requests element from the same 128-bit lane
22281 and MSB set if it requests element from the other 128-bit lane.
22282 The second mask will have the opposite values of the MSB,
22283 and additionally will have its 128-bit lanes swapped.
22284 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
22285 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
22286 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
22287 stands for other 12 bytes. */
22288 /* The bit whether element is from the same lane or the other
22289 lane is bit 4, so shift it up by 3 to the MSB position. */
22290 t5 = gen_reg_rtx (V4DImode);
22291 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
22292 GEN_INT (3)));
22293 /* Clear MSB bits from the mask just in case it had them set. */
22294 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
22295 /* After this t1 will have MSB set for elements from other lane. */
22296 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
22297 /* Clear bits other than MSB. */
22298 emit_insn (gen_andv32qi3 (t1, t1, vt));
22299 /* Or in the lower bits from mask into t3. */
22300 emit_insn (gen_iorv32qi3 (t3, t1, t2));
22301 /* And invert MSB bits in t1, so MSB is set for elements from the same
22302 lane. */
22303 emit_insn (gen_xorv32qi3 (t1, t1, vt));
22304 /* Swap 128-bit lanes in t3. */
22305 t6 = gen_reg_rtx (V4DImode);
22306 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
22307 const2_rtx, GEN_INT (3),
22308 const0_rtx, const1_rtx));
22309 /* And or in the lower bits from mask into t1. */
22310 emit_insn (gen_iorv32qi3 (t1, t1, t2));
22311 if (one_operand_shuffle)
22313 /* Each of these shuffles will put 0s in places where
22314 element from the other 128-bit lane is needed, otherwise
22315 will shuffle in the requested value. */
22316 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
22317 gen_lowpart (V32QImode, t6)));
22318 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
22319 /* For t3 the 128-bit lanes are swapped again. */
22320 t7 = gen_reg_rtx (V4DImode);
22321 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
22322 const2_rtx, GEN_INT (3),
22323 const0_rtx, const1_rtx));
22324 /* And oring both together leads to the result. */
22325 emit_insn (gen_iorv32qi3 (target, t1,
22326 gen_lowpart (V32QImode, t7)));
22327 if (target != operands[0])
22328 emit_move_insn (operands[0],
22329 gen_lowpart (GET_MODE (operands[0]), target));
22330 return;
22333 t4 = gen_reg_rtx (V32QImode);
22334 /* Similarly to the above one_operand_shuffle code,
22335 just for repeated twice for each operand. merge_two:
22336 code will merge the two results together. */
22337 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
22338 gen_lowpart (V32QImode, t6)));
22339 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
22340 gen_lowpart (V32QImode, t6)));
22341 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
22342 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
22343 t7 = gen_reg_rtx (V4DImode);
22344 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
22345 const2_rtx, GEN_INT (3),
22346 const0_rtx, const1_rtx));
22347 t8 = gen_reg_rtx (V4DImode);
22348 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
22349 const2_rtx, GEN_INT (3),
22350 const0_rtx, const1_rtx));
22351 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
22352 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
22353 t1 = t4;
22354 t2 = t3;
22355 goto merge_two;
22357 default:
22358 gcc_assert (GET_MODE_SIZE (mode) <= 16);
22359 break;
22363 if (TARGET_XOP)
22365 /* The XOP VPPERM insn supports three inputs. By ignoring the
22366 one_operand_shuffle special case, we avoid creating another
22367 set of constant vectors in memory. */
22368 one_operand_shuffle = false;
22370 /* mask = mask & {2*w-1, ...} */
22371 vt = GEN_INT (2*w - 1);
22373 else
22375 /* mask = mask & {w-1, ...} */
22376 vt = GEN_INT (w - 1);
22379 for (i = 0; i < w; i++)
22380 vec[i] = vt;
22381 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22382 mask = expand_simple_binop (maskmode, AND, mask, vt,
22383 NULL_RTX, 0, OPTAB_DIRECT);
22385 /* For non-QImode operations, convert the word permutation control
22386 into a byte permutation control. */
22387 if (mode != V16QImode)
22389 mask = expand_simple_binop (maskmode, ASHIFT, mask,
22390 GEN_INT (exact_log2 (e)),
22391 NULL_RTX, 0, OPTAB_DIRECT);
22393 /* Convert mask to vector of chars. */
22394 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
22396 /* Replicate each of the input bytes into byte positions:
22397 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
22398 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
22399 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
22400 for (i = 0; i < 16; ++i)
22401 vec[i] = GEN_INT (i/e * e);
22402 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22403 vt = validize_mem (force_const_mem (V16QImode, vt));
22404 if (TARGET_XOP)
22405 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
22406 else
22407 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
22409 /* Convert it into the byte positions by doing
22410 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
22411 for (i = 0; i < 16; ++i)
22412 vec[i] = GEN_INT (i % e);
22413 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22414 vt = validize_mem (force_const_mem (V16QImode, vt));
22415 emit_insn (gen_addv16qi3 (mask, mask, vt));
22418 /* The actual shuffle operations all operate on V16QImode. */
22419 op0 = gen_lowpart (V16QImode, op0);
22420 op1 = gen_lowpart (V16QImode, op1);
22422 if (TARGET_XOP)
22424 if (GET_MODE (target) != V16QImode)
22425 target = gen_reg_rtx (V16QImode);
22426 emit_insn (gen_xop_pperm (target, op0, op1, mask));
22427 if (target != operands[0])
22428 emit_move_insn (operands[0],
22429 gen_lowpart (GET_MODE (operands[0]), target));
22431 else if (one_operand_shuffle)
22433 if (GET_MODE (target) != V16QImode)
22434 target = gen_reg_rtx (V16QImode);
22435 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
22436 if (target != operands[0])
22437 emit_move_insn (operands[0],
22438 gen_lowpart (GET_MODE (operands[0]), target));
22440 else
22442 rtx xops[6];
22443 bool ok;
22445 /* Shuffle the two input vectors independently. */
22446 t1 = gen_reg_rtx (V16QImode);
22447 t2 = gen_reg_rtx (V16QImode);
22448 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
22449 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
22451 merge_two:
22452 /* Then merge them together. The key is whether any given control
22453 element contained a bit set that indicates the second word. */
22454 mask = operands[3];
22455 vt = GEN_INT (w);
22456 if (maskmode == V2DImode && !TARGET_SSE4_1)
22458 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
22459 more shuffle to convert the V2DI input mask into a V4SI
22460 input mask. At which point the masking that expand_int_vcond
22461 will work as desired. */
22462 rtx t3 = gen_reg_rtx (V4SImode);
22463 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
22464 const0_rtx, const0_rtx,
22465 const2_rtx, const2_rtx));
22466 mask = t3;
22467 maskmode = V4SImode;
22468 e = w = 4;
22471 for (i = 0; i < w; i++)
22472 vec[i] = vt;
22473 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22474 vt = force_reg (maskmode, vt);
22475 mask = expand_simple_binop (maskmode, AND, mask, vt,
22476 NULL_RTX, 0, OPTAB_DIRECT);
22478 if (GET_MODE (target) != mode)
22479 target = gen_reg_rtx (mode);
22480 xops[0] = target;
22481 xops[1] = gen_lowpart (mode, t2);
22482 xops[2] = gen_lowpart (mode, t1);
22483 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
22484 xops[4] = mask;
22485 xops[5] = vt;
22486 ok = ix86_expand_int_vcond (xops);
22487 gcc_assert (ok);
22488 if (target != operands[0])
22489 emit_move_insn (operands[0],
22490 gen_lowpart (GET_MODE (operands[0]), target));
22494 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
22495 true if we should do zero extension, else sign extension. HIGH_P is
22496 true if we want the N/2 high elements, else the low elements. */
22498 void
22499 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
22501 machine_mode imode = GET_MODE (src);
22502 rtx tmp;
22504 if (TARGET_SSE4_1)
22506 rtx (*unpack)(rtx, rtx);
22507 rtx (*extract)(rtx, rtx) = NULL;
22508 machine_mode halfmode = BLKmode;
22510 switch (imode)
22512 case V64QImode:
22513 if (unsigned_p)
22514 unpack = gen_avx512bw_zero_extendv32qiv32hi2;
22515 else
22516 unpack = gen_avx512bw_sign_extendv32qiv32hi2;
22517 halfmode = V32QImode;
22518 extract
22519 = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
22520 break;
22521 case V32QImode:
22522 if (unsigned_p)
22523 unpack = gen_avx2_zero_extendv16qiv16hi2;
22524 else
22525 unpack = gen_avx2_sign_extendv16qiv16hi2;
22526 halfmode = V16QImode;
22527 extract
22528 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
22529 break;
22530 case V32HImode:
22531 if (unsigned_p)
22532 unpack = gen_avx512f_zero_extendv16hiv16si2;
22533 else
22534 unpack = gen_avx512f_sign_extendv16hiv16si2;
22535 halfmode = V16HImode;
22536 extract
22537 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
22538 break;
22539 case V16HImode:
22540 if (unsigned_p)
22541 unpack = gen_avx2_zero_extendv8hiv8si2;
22542 else
22543 unpack = gen_avx2_sign_extendv8hiv8si2;
22544 halfmode = V8HImode;
22545 extract
22546 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
22547 break;
22548 case V16SImode:
22549 if (unsigned_p)
22550 unpack = gen_avx512f_zero_extendv8siv8di2;
22551 else
22552 unpack = gen_avx512f_sign_extendv8siv8di2;
22553 halfmode = V8SImode;
22554 extract
22555 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
22556 break;
22557 case V8SImode:
22558 if (unsigned_p)
22559 unpack = gen_avx2_zero_extendv4siv4di2;
22560 else
22561 unpack = gen_avx2_sign_extendv4siv4di2;
22562 halfmode = V4SImode;
22563 extract
22564 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
22565 break;
22566 case V16QImode:
22567 if (unsigned_p)
22568 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
22569 else
22570 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
22571 break;
22572 case V8HImode:
22573 if (unsigned_p)
22574 unpack = gen_sse4_1_zero_extendv4hiv4si2;
22575 else
22576 unpack = gen_sse4_1_sign_extendv4hiv4si2;
22577 break;
22578 case V4SImode:
22579 if (unsigned_p)
22580 unpack = gen_sse4_1_zero_extendv2siv2di2;
22581 else
22582 unpack = gen_sse4_1_sign_extendv2siv2di2;
22583 break;
22584 default:
22585 gcc_unreachable ();
22588 if (GET_MODE_SIZE (imode) >= 32)
22590 tmp = gen_reg_rtx (halfmode);
22591 emit_insn (extract (tmp, src));
22593 else if (high_p)
22595 /* Shift higher 8 bytes to lower 8 bytes. */
22596 tmp = gen_reg_rtx (V1TImode);
22597 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
22598 GEN_INT (64)));
22599 tmp = gen_lowpart (imode, tmp);
22601 else
22602 tmp = src;
22604 emit_insn (unpack (dest, tmp));
22606 else
22608 rtx (*unpack)(rtx, rtx, rtx);
22610 switch (imode)
22612 case V16QImode:
22613 if (high_p)
22614 unpack = gen_vec_interleave_highv16qi;
22615 else
22616 unpack = gen_vec_interleave_lowv16qi;
22617 break;
22618 case V8HImode:
22619 if (high_p)
22620 unpack = gen_vec_interleave_highv8hi;
22621 else
22622 unpack = gen_vec_interleave_lowv8hi;
22623 break;
22624 case V4SImode:
22625 if (high_p)
22626 unpack = gen_vec_interleave_highv4si;
22627 else
22628 unpack = gen_vec_interleave_lowv4si;
22629 break;
22630 default:
22631 gcc_unreachable ();
22634 if (unsigned_p)
22635 tmp = force_reg (imode, CONST0_RTX (imode));
22636 else
22637 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
22638 src, pc_rtx, pc_rtx);
22640 rtx tmp2 = gen_reg_rtx (imode);
22641 emit_insn (unpack (tmp2, src, tmp));
22642 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
22646 /* Expand conditional increment or decrement using adb/sbb instructions.
22647 The default case using setcc followed by the conditional move can be
22648 done by generic code. */
22649 bool
22650 ix86_expand_int_addcc (rtx operands[])
22652 enum rtx_code code = GET_CODE (operands[1]);
22653 rtx flags;
22654 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
22655 rtx compare_op;
22656 rtx val = const0_rtx;
22657 bool fpcmp = false;
22658 machine_mode mode;
22659 rtx op0 = XEXP (operands[1], 0);
22660 rtx op1 = XEXP (operands[1], 1);
22662 if (operands[3] != const1_rtx
22663 && operands[3] != constm1_rtx)
22664 return false;
22665 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
22666 return false;
22667 code = GET_CODE (compare_op);
22669 flags = XEXP (compare_op, 0);
22671 if (GET_MODE (flags) == CCFPmode
22672 || GET_MODE (flags) == CCFPUmode)
22674 fpcmp = true;
22675 code = ix86_fp_compare_code_to_integer (code);
22678 if (code != LTU)
22680 val = constm1_rtx;
22681 if (fpcmp)
22682 PUT_CODE (compare_op,
22683 reverse_condition_maybe_unordered
22684 (GET_CODE (compare_op)));
22685 else
22686 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
22689 mode = GET_MODE (operands[0]);
22691 /* Construct either adc or sbb insn. */
22692 if ((code == LTU) == (operands[3] == constm1_rtx))
22694 switch (mode)
22696 case QImode:
22697 insn = gen_subqi3_carry;
22698 break;
22699 case HImode:
22700 insn = gen_subhi3_carry;
22701 break;
22702 case SImode:
22703 insn = gen_subsi3_carry;
22704 break;
22705 case DImode:
22706 insn = gen_subdi3_carry;
22707 break;
22708 default:
22709 gcc_unreachable ();
22712 else
22714 switch (mode)
22716 case QImode:
22717 insn = gen_addqi3_carry;
22718 break;
22719 case HImode:
22720 insn = gen_addhi3_carry;
22721 break;
22722 case SImode:
22723 insn = gen_addsi3_carry;
22724 break;
22725 case DImode:
22726 insn = gen_adddi3_carry;
22727 break;
22728 default:
22729 gcc_unreachable ();
22732 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
22734 return true;
22738 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22739 but works for floating pointer parameters and nonoffsetable memories.
22740 For pushes, it returns just stack offsets; the values will be saved
22741 in the right order. Maximally three parts are generated. */
22743 static int
22744 ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode)
22746 int size;
22748 if (!TARGET_64BIT)
22749 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22750 else
22751 size = (GET_MODE_SIZE (mode) + 4) / 8;
22753 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22754 gcc_assert (size >= 2 && size <= 4);
22756 /* Optimize constant pool reference to immediates. This is used by fp
22757 moves, that force all constants to memory to allow combining. */
22758 if (MEM_P (operand) && MEM_READONLY_P (operand))
22760 rtx tmp = maybe_get_pool_constant (operand);
22761 if (tmp)
22762 operand = tmp;
22765 if (MEM_P (operand) && !offsettable_memref_p (operand))
22767 /* The only non-offsetable memories we handle are pushes. */
22768 int ok = push_operand (operand, VOIDmode);
22770 gcc_assert (ok);
22772 operand = copy_rtx (operand);
22773 PUT_MODE (operand, word_mode);
22774 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22775 return size;
22778 if (GET_CODE (operand) == CONST_VECTOR)
22780 machine_mode imode = int_mode_for_mode (mode);
22781 /* Caution: if we looked through a constant pool memory above,
22782 the operand may actually have a different mode now. That's
22783 ok, since we want to pun this all the way back to an integer. */
22784 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22785 gcc_assert (operand != NULL);
22786 mode = imode;
22789 if (!TARGET_64BIT)
22791 if (mode == DImode)
22792 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22793 else
22795 int i;
22797 if (REG_P (operand))
22799 gcc_assert (reload_completed);
22800 for (i = 0; i < size; i++)
22801 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22803 else if (offsettable_memref_p (operand))
22805 operand = adjust_address (operand, SImode, 0);
22806 parts[0] = operand;
22807 for (i = 1; i < size; i++)
22808 parts[i] = adjust_address (operand, SImode, 4 * i);
22810 else if (CONST_DOUBLE_P (operand))
22812 REAL_VALUE_TYPE r;
22813 long l[4];
22815 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22816 switch (mode)
22818 case TFmode:
22819 real_to_target (l, &r, mode);
22820 parts[3] = gen_int_mode (l[3], SImode);
22821 parts[2] = gen_int_mode (l[2], SImode);
22822 break;
22823 case XFmode:
22824 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22825 long double may not be 80-bit. */
22826 real_to_target (l, &r, mode);
22827 parts[2] = gen_int_mode (l[2], SImode);
22828 break;
22829 case DFmode:
22830 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22831 break;
22832 default:
22833 gcc_unreachable ();
22835 parts[1] = gen_int_mode (l[1], SImode);
22836 parts[0] = gen_int_mode (l[0], SImode);
22838 else
22839 gcc_unreachable ();
22842 else
22844 if (mode == TImode)
22845 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22846 if (mode == XFmode || mode == TFmode)
22848 machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22849 if (REG_P (operand))
22851 gcc_assert (reload_completed);
22852 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22853 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22855 else if (offsettable_memref_p (operand))
22857 operand = adjust_address (operand, DImode, 0);
22858 parts[0] = operand;
22859 parts[1] = adjust_address (operand, upper_mode, 8);
22861 else if (CONST_DOUBLE_P (operand))
22863 REAL_VALUE_TYPE r;
22864 long l[4];
22866 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22867 real_to_target (l, &r, mode);
22869 /* real_to_target puts 32-bit pieces in each long. */
22870 parts[0] =
22871 gen_int_mode
22872 ((l[0] & (HOST_WIDE_INT) 0xffffffff)
22873 | ((l[1] & (HOST_WIDE_INT) 0xffffffff) << 32),
22874 DImode);
22876 if (upper_mode == SImode)
22877 parts[1] = gen_int_mode (l[2], SImode);
22878 else
22879 parts[1] =
22880 gen_int_mode
22881 ((l[2] & (HOST_WIDE_INT) 0xffffffff)
22882 | ((l[3] & (HOST_WIDE_INT) 0xffffffff) << 32),
22883 DImode);
22885 else
22886 gcc_unreachable ();
22890 return size;
22893 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22894 Return false when normal moves are needed; true when all required
22895 insns have been emitted. Operands 2-4 contain the input values
22896 int the correct order; operands 5-7 contain the output values. */
22898 void
22899 ix86_split_long_move (rtx operands[])
22901 rtx part[2][4];
22902 int nparts, i, j;
22903 int push = 0;
22904 int collisions = 0;
22905 machine_mode mode = GET_MODE (operands[0]);
22906 bool collisionparts[4];
22908 /* The DFmode expanders may ask us to move double.
22909 For 64bit target this is single move. By hiding the fact
22910 here we simplify i386.md splitters. */
22911 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22913 /* Optimize constant pool reference to immediates. This is used by
22914 fp moves, that force all constants to memory to allow combining. */
22916 if (MEM_P (operands[1])
22917 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22918 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22919 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22920 if (push_operand (operands[0], VOIDmode))
22922 operands[0] = copy_rtx (operands[0]);
22923 PUT_MODE (operands[0], word_mode);
22925 else
22926 operands[0] = gen_lowpart (DImode, operands[0]);
22927 operands[1] = gen_lowpart (DImode, operands[1]);
22928 emit_move_insn (operands[0], operands[1]);
22929 return;
22932 /* The only non-offsettable memory we handle is push. */
22933 if (push_operand (operands[0], VOIDmode))
22934 push = 1;
22935 else
22936 gcc_assert (!MEM_P (operands[0])
22937 || offsettable_memref_p (operands[0]));
22939 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22940 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22942 /* When emitting push, take care for source operands on the stack. */
22943 if (push && MEM_P (operands[1])
22944 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22946 rtx src_base = XEXP (part[1][nparts - 1], 0);
22948 /* Compensate for the stack decrement by 4. */
22949 if (!TARGET_64BIT && nparts == 3
22950 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22951 src_base = plus_constant (Pmode, src_base, 4);
22953 /* src_base refers to the stack pointer and is
22954 automatically decreased by emitted push. */
22955 for (i = 0; i < nparts; i++)
22956 part[1][i] = change_address (part[1][i],
22957 GET_MODE (part[1][i]), src_base);
22960 /* We need to do copy in the right order in case an address register
22961 of the source overlaps the destination. */
22962 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22964 rtx tmp;
22966 for (i = 0; i < nparts; i++)
22968 collisionparts[i]
22969 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22970 if (collisionparts[i])
22971 collisions++;
22974 /* Collision in the middle part can be handled by reordering. */
22975 if (collisions == 1 && nparts == 3 && collisionparts [1])
22977 std::swap (part[0][1], part[0][2]);
22978 std::swap (part[1][1], part[1][2]);
22980 else if (collisions == 1
22981 && nparts == 4
22982 && (collisionparts [1] || collisionparts [2]))
22984 if (collisionparts [1])
22986 std::swap (part[0][1], part[0][2]);
22987 std::swap (part[1][1], part[1][2]);
22989 else
22991 std::swap (part[0][2], part[0][3]);
22992 std::swap (part[1][2], part[1][3]);
22996 /* If there are more collisions, we can't handle it by reordering.
22997 Do an lea to the last part and use only one colliding move. */
22998 else if (collisions > 1)
23000 rtx base, addr, tls_base = NULL_RTX;
23002 collisions = 1;
23004 base = part[0][nparts - 1];
23006 /* Handle the case when the last part isn't valid for lea.
23007 Happens in 64-bit mode storing the 12-byte XFmode. */
23008 if (GET_MODE (base) != Pmode)
23009 base = gen_rtx_REG (Pmode, REGNO (base));
23011 addr = XEXP (part[1][0], 0);
23012 if (TARGET_TLS_DIRECT_SEG_REFS)
23014 struct ix86_address parts;
23015 int ok = ix86_decompose_address (addr, &parts);
23016 gcc_assert (ok);
23017 if (parts.seg == DEFAULT_TLS_SEG_REG)
23019 /* It is not valid to use %gs: or %fs: in
23020 lea though, so we need to remove it from the
23021 address used for lea and add it to each individual
23022 memory loads instead. */
23023 addr = copy_rtx (addr);
23024 rtx *x = &addr;
23025 while (GET_CODE (*x) == PLUS)
23027 for (i = 0; i < 2; i++)
23029 rtx u = XEXP (*x, i);
23030 if (GET_CODE (u) == ZERO_EXTEND)
23031 u = XEXP (u, 0);
23032 if (GET_CODE (u) == UNSPEC
23033 && XINT (u, 1) == UNSPEC_TP)
23035 tls_base = XEXP (*x, i);
23036 *x = XEXP (*x, 1 - i);
23037 break;
23040 if (tls_base)
23041 break;
23042 x = &XEXP (*x, 0);
23044 gcc_assert (tls_base);
23047 emit_insn (gen_rtx_SET (base, addr));
23048 if (tls_base)
23049 base = gen_rtx_PLUS (GET_MODE (base), base, tls_base);
23050 part[1][0] = replace_equiv_address (part[1][0], base);
23051 for (i = 1; i < nparts; i++)
23053 if (tls_base)
23054 base = copy_rtx (base);
23055 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
23056 part[1][i] = replace_equiv_address (part[1][i], tmp);
23061 if (push)
23063 if (!TARGET_64BIT)
23065 if (nparts == 3)
23067 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
23068 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
23069 stack_pointer_rtx, GEN_INT (-4)));
23070 emit_move_insn (part[0][2], part[1][2]);
23072 else if (nparts == 4)
23074 emit_move_insn (part[0][3], part[1][3]);
23075 emit_move_insn (part[0][2], part[1][2]);
23078 else
23080 /* In 64bit mode we don't have 32bit push available. In case this is
23081 register, it is OK - we will just use larger counterpart. We also
23082 retype memory - these comes from attempt to avoid REX prefix on
23083 moving of second half of TFmode value. */
23084 if (GET_MODE (part[1][1]) == SImode)
23086 switch (GET_CODE (part[1][1]))
23088 case MEM:
23089 part[1][1] = adjust_address (part[1][1], DImode, 0);
23090 break;
23092 case REG:
23093 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
23094 break;
23096 default:
23097 gcc_unreachable ();
23100 if (GET_MODE (part[1][0]) == SImode)
23101 part[1][0] = part[1][1];
23104 emit_move_insn (part[0][1], part[1][1]);
23105 emit_move_insn (part[0][0], part[1][0]);
23106 return;
23109 /* Choose correct order to not overwrite the source before it is copied. */
23110 if ((REG_P (part[0][0])
23111 && REG_P (part[1][1])
23112 && (REGNO (part[0][0]) == REGNO (part[1][1])
23113 || (nparts == 3
23114 && REGNO (part[0][0]) == REGNO (part[1][2]))
23115 || (nparts == 4
23116 && REGNO (part[0][0]) == REGNO (part[1][3]))))
23117 || (collisions > 0
23118 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
23120 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
23122 operands[2 + i] = part[0][j];
23123 operands[6 + i] = part[1][j];
23126 else
23128 for (i = 0; i < nparts; i++)
23130 operands[2 + i] = part[0][i];
23131 operands[6 + i] = part[1][i];
23135 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
23136 if (optimize_insn_for_size_p ())
23138 for (j = 0; j < nparts - 1; j++)
23139 if (CONST_INT_P (operands[6 + j])
23140 && operands[6 + j] != const0_rtx
23141 && REG_P (operands[2 + j]))
23142 for (i = j; i < nparts - 1; i++)
23143 if (CONST_INT_P (operands[7 + i])
23144 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
23145 operands[7 + i] = operands[2 + j];
23148 for (i = 0; i < nparts; i++)
23149 emit_move_insn (operands[2 + i], operands[6 + i]);
23151 return;
23154 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
23155 left shift by a constant, either using a single shift or
23156 a sequence of add instructions. */
23158 static void
23159 ix86_expand_ashl_const (rtx operand, int count, machine_mode mode)
23161 rtx (*insn)(rtx, rtx, rtx);
23163 if (count == 1
23164 || (count * ix86_cost->add <= ix86_cost->shift_const
23165 && !optimize_insn_for_size_p ()))
23167 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
23168 while (count-- > 0)
23169 emit_insn (insn (operand, operand, operand));
23171 else
23173 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
23174 emit_insn (insn (operand, operand, GEN_INT (count)));
23178 void
23179 ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode)
23181 rtx (*gen_ashl3)(rtx, rtx, rtx);
23182 rtx (*gen_shld)(rtx, rtx, rtx);
23183 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23185 rtx low[2], high[2];
23186 int count;
23188 if (CONST_INT_P (operands[2]))
23190 split_double_mode (mode, operands, 2, low, high);
23191 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23193 if (count >= half_width)
23195 emit_move_insn (high[0], low[1]);
23196 emit_move_insn (low[0], const0_rtx);
23198 if (count > half_width)
23199 ix86_expand_ashl_const (high[0], count - half_width, mode);
23201 else
23203 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23205 if (!rtx_equal_p (operands[0], operands[1]))
23206 emit_move_insn (operands[0], operands[1]);
23208 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
23209 ix86_expand_ashl_const (low[0], count, mode);
23211 return;
23214 split_double_mode (mode, operands, 1, low, high);
23216 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
23218 if (operands[1] == const1_rtx)
23220 /* Assuming we've chosen a QImode capable registers, then 1 << N
23221 can be done with two 32/64-bit shifts, no branches, no cmoves. */
23222 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
23224 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
23226 ix86_expand_clear (low[0]);
23227 ix86_expand_clear (high[0]);
23228 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
23230 d = gen_lowpart (QImode, low[0]);
23231 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23232 s = gen_rtx_EQ (QImode, flags, const0_rtx);
23233 emit_insn (gen_rtx_SET (d, s));
23235 d = gen_lowpart (QImode, high[0]);
23236 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23237 s = gen_rtx_NE (QImode, flags, const0_rtx);
23238 emit_insn (gen_rtx_SET (d, s));
23241 /* Otherwise, we can get the same results by manually performing
23242 a bit extract operation on bit 5/6, and then performing the two
23243 shifts. The two methods of getting 0/1 into low/high are exactly
23244 the same size. Avoiding the shift in the bit extract case helps
23245 pentium4 a bit; no one else seems to care much either way. */
23246 else
23248 machine_mode half_mode;
23249 rtx (*gen_lshr3)(rtx, rtx, rtx);
23250 rtx (*gen_and3)(rtx, rtx, rtx);
23251 rtx (*gen_xor3)(rtx, rtx, rtx);
23252 HOST_WIDE_INT bits;
23253 rtx x;
23255 if (mode == DImode)
23257 half_mode = SImode;
23258 gen_lshr3 = gen_lshrsi3;
23259 gen_and3 = gen_andsi3;
23260 gen_xor3 = gen_xorsi3;
23261 bits = 5;
23263 else
23265 half_mode = DImode;
23266 gen_lshr3 = gen_lshrdi3;
23267 gen_and3 = gen_anddi3;
23268 gen_xor3 = gen_xordi3;
23269 bits = 6;
23272 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
23273 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
23274 else
23275 x = gen_lowpart (half_mode, operands[2]);
23276 emit_insn (gen_rtx_SET (high[0], x));
23278 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
23279 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
23280 emit_move_insn (low[0], high[0]);
23281 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
23284 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23285 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
23286 return;
23289 if (operands[1] == constm1_rtx)
23291 /* For -1 << N, we can avoid the shld instruction, because we
23292 know that we're shifting 0...31/63 ones into a -1. */
23293 emit_move_insn (low[0], constm1_rtx);
23294 if (optimize_insn_for_size_p ())
23295 emit_move_insn (high[0], low[0]);
23296 else
23297 emit_move_insn (high[0], constm1_rtx);
23299 else
23301 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23303 if (!rtx_equal_p (operands[0], operands[1]))
23304 emit_move_insn (operands[0], operands[1]);
23306 split_double_mode (mode, operands, 1, low, high);
23307 emit_insn (gen_shld (high[0], low[0], operands[2]));
23310 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23312 if (TARGET_CMOVE && scratch)
23314 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23315 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23317 ix86_expand_clear (scratch);
23318 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
23320 else
23322 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23323 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23325 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
23329 void
23330 ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode)
23332 rtx (*gen_ashr3)(rtx, rtx, rtx)
23333 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
23334 rtx (*gen_shrd)(rtx, rtx, rtx);
23335 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23337 rtx low[2], high[2];
23338 int count;
23340 if (CONST_INT_P (operands[2]))
23342 split_double_mode (mode, operands, 2, low, high);
23343 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23345 if (count == GET_MODE_BITSIZE (mode) - 1)
23347 emit_move_insn (high[0], high[1]);
23348 emit_insn (gen_ashr3 (high[0], high[0],
23349 GEN_INT (half_width - 1)));
23350 emit_move_insn (low[0], high[0]);
23353 else if (count >= half_width)
23355 emit_move_insn (low[0], high[1]);
23356 emit_move_insn (high[0], low[0]);
23357 emit_insn (gen_ashr3 (high[0], high[0],
23358 GEN_INT (half_width - 1)));
23360 if (count > half_width)
23361 emit_insn (gen_ashr3 (low[0], low[0],
23362 GEN_INT (count - half_width)));
23364 else
23366 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23368 if (!rtx_equal_p (operands[0], operands[1]))
23369 emit_move_insn (operands[0], operands[1]);
23371 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23372 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
23375 else
23377 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23379 if (!rtx_equal_p (operands[0], operands[1]))
23380 emit_move_insn (operands[0], operands[1]);
23382 split_double_mode (mode, operands, 1, low, high);
23384 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23385 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
23387 if (TARGET_CMOVE && scratch)
23389 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23390 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23392 emit_move_insn (scratch, high[0]);
23393 emit_insn (gen_ashr3 (scratch, scratch,
23394 GEN_INT (half_width - 1)));
23395 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23396 scratch));
23398 else
23400 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
23401 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
23403 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
23408 void
23409 ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
23411 rtx (*gen_lshr3)(rtx, rtx, rtx)
23412 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
23413 rtx (*gen_shrd)(rtx, rtx, rtx);
23414 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23416 rtx low[2], high[2];
23417 int count;
23419 if (CONST_INT_P (operands[2]))
23421 split_double_mode (mode, operands, 2, low, high);
23422 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23424 if (count >= half_width)
23426 emit_move_insn (low[0], high[1]);
23427 ix86_expand_clear (high[0]);
23429 if (count > half_width)
23430 emit_insn (gen_lshr3 (low[0], low[0],
23431 GEN_INT (count - half_width)));
23433 else
23435 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23437 if (!rtx_equal_p (operands[0], operands[1]))
23438 emit_move_insn (operands[0], operands[1]);
23440 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23441 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
23444 else
23446 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23448 if (!rtx_equal_p (operands[0], operands[1]))
23449 emit_move_insn (operands[0], operands[1]);
23451 split_double_mode (mode, operands, 1, low, high);
23453 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23454 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
23456 if (TARGET_CMOVE && scratch)
23458 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23459 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23461 ix86_expand_clear (scratch);
23462 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23463 scratch));
23465 else
23467 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23468 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23470 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
23475 /* Predict just emitted jump instruction to be taken with probability PROB. */
23476 static void
23477 predict_jump (int prob)
23479 rtx insn = get_last_insn ();
23480 gcc_assert (JUMP_P (insn));
23481 add_int_reg_note (insn, REG_BR_PROB, prob);
23484 /* Helper function for the string operations below. Dest VARIABLE whether
23485 it is aligned to VALUE bytes. If true, jump to the label. */
23486 static rtx_code_label *
23487 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
23489 rtx_code_label *label = gen_label_rtx ();
23490 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
23491 if (GET_MODE (variable) == DImode)
23492 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
23493 else
23494 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
23495 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
23496 1, label);
23497 if (epilogue)
23498 predict_jump (REG_BR_PROB_BASE * 50 / 100);
23499 else
23500 predict_jump (REG_BR_PROB_BASE * 90 / 100);
23501 return label;
23504 /* Adjust COUNTER by the VALUE. */
23505 static void
23506 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
23508 rtx (*gen_add)(rtx, rtx, rtx)
23509 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
23511 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
23514 /* Zero extend possibly SImode EXP to Pmode register. */
23516 ix86_zero_extend_to_Pmode (rtx exp)
23518 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
23521 /* Divide COUNTREG by SCALE. */
23522 static rtx
23523 scale_counter (rtx countreg, int scale)
23525 rtx sc;
23527 if (scale == 1)
23528 return countreg;
23529 if (CONST_INT_P (countreg))
23530 return GEN_INT (INTVAL (countreg) / scale);
23531 gcc_assert (REG_P (countreg));
23533 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
23534 GEN_INT (exact_log2 (scale)),
23535 NULL, 1, OPTAB_DIRECT);
23536 return sc;
23539 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
23540 DImode for constant loop counts. */
23542 static machine_mode
23543 counter_mode (rtx count_exp)
23545 if (GET_MODE (count_exp) != VOIDmode)
23546 return GET_MODE (count_exp);
23547 if (!CONST_INT_P (count_exp))
23548 return Pmode;
23549 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
23550 return DImode;
23551 return SImode;
23554 /* Copy the address to a Pmode register. This is used for x32 to
23555 truncate DImode TLS address to a SImode register. */
23557 static rtx
23558 ix86_copy_addr_to_reg (rtx addr)
23560 rtx reg;
23561 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
23563 reg = copy_addr_to_reg (addr);
23564 REG_POINTER (reg) = 1;
23565 return reg;
23567 else
23569 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
23570 reg = copy_to_mode_reg (DImode, addr);
23571 REG_POINTER (reg) = 1;
23572 return gen_rtx_SUBREG (SImode, reg, 0);
23576 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
23577 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
23578 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
23579 memory by VALUE (supposed to be in MODE).
23581 The size is rounded down to whole number of chunk size moved at once.
23582 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
23585 static void
23586 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
23587 rtx destptr, rtx srcptr, rtx value,
23588 rtx count, machine_mode mode, int unroll,
23589 int expected_size, bool issetmem)
23591 rtx_code_label *out_label, *top_label;
23592 rtx iter, tmp;
23593 machine_mode iter_mode = counter_mode (count);
23594 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
23595 rtx piece_size = GEN_INT (piece_size_n);
23596 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
23597 rtx size;
23598 int i;
23600 top_label = gen_label_rtx ();
23601 out_label = gen_label_rtx ();
23602 iter = gen_reg_rtx (iter_mode);
23604 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
23605 NULL, 1, OPTAB_DIRECT);
23606 /* Those two should combine. */
23607 if (piece_size == const1_rtx)
23609 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
23610 true, out_label);
23611 predict_jump (REG_BR_PROB_BASE * 10 / 100);
23613 emit_move_insn (iter, const0_rtx);
23615 emit_label (top_label);
23617 tmp = convert_modes (Pmode, iter_mode, iter, true);
23619 /* This assert could be relaxed - in this case we'll need to compute
23620 smallest power of two, containing in PIECE_SIZE_N and pass it to
23621 offset_address. */
23622 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
23623 destmem = offset_address (destmem, tmp, piece_size_n);
23624 destmem = adjust_address (destmem, mode, 0);
23626 if (!issetmem)
23628 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
23629 srcmem = adjust_address (srcmem, mode, 0);
23631 /* When unrolling for chips that reorder memory reads and writes,
23632 we can save registers by using single temporary.
23633 Also using 4 temporaries is overkill in 32bit mode. */
23634 if (!TARGET_64BIT && 0)
23636 for (i = 0; i < unroll; i++)
23638 if (i)
23640 destmem =
23641 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23642 srcmem =
23643 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23645 emit_move_insn (destmem, srcmem);
23648 else
23650 rtx tmpreg[4];
23651 gcc_assert (unroll <= 4);
23652 for (i = 0; i < unroll; i++)
23654 tmpreg[i] = gen_reg_rtx (mode);
23655 if (i)
23657 srcmem =
23658 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23660 emit_move_insn (tmpreg[i], srcmem);
23662 for (i = 0; i < unroll; i++)
23664 if (i)
23666 destmem =
23667 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23669 emit_move_insn (destmem, tmpreg[i]);
23673 else
23674 for (i = 0; i < unroll; i++)
23676 if (i)
23677 destmem =
23678 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23679 emit_move_insn (destmem, value);
23682 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
23683 true, OPTAB_LIB_WIDEN);
23684 if (tmp != iter)
23685 emit_move_insn (iter, tmp);
23687 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
23688 true, top_label);
23689 if (expected_size != -1)
23691 expected_size /= GET_MODE_SIZE (mode) * unroll;
23692 if (expected_size == 0)
23693 predict_jump (0);
23694 else if (expected_size > REG_BR_PROB_BASE)
23695 predict_jump (REG_BR_PROB_BASE - 1);
23696 else
23697 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
23699 else
23700 predict_jump (REG_BR_PROB_BASE * 80 / 100);
23701 iter = ix86_zero_extend_to_Pmode (iter);
23702 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
23703 true, OPTAB_LIB_WIDEN);
23704 if (tmp != destptr)
23705 emit_move_insn (destptr, tmp);
23706 if (!issetmem)
23708 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
23709 true, OPTAB_LIB_WIDEN);
23710 if (tmp != srcptr)
23711 emit_move_insn (srcptr, tmp);
23713 emit_label (out_label);
23716 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
23717 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
23718 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
23719 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
23720 ORIG_VALUE is the original value passed to memset to fill the memory with.
23721 Other arguments have same meaning as for previous function. */
23723 static void
23724 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
23725 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
23726 rtx count,
23727 machine_mode mode, bool issetmem)
23729 rtx destexp;
23730 rtx srcexp;
23731 rtx countreg;
23732 HOST_WIDE_INT rounded_count;
23734 /* If possible, it is shorter to use rep movs.
23735 TODO: Maybe it is better to move this logic to decide_alg. */
23736 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
23737 && (!issetmem || orig_value == const0_rtx))
23738 mode = SImode;
23740 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
23741 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
23743 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
23744 GET_MODE_SIZE (mode)));
23745 if (mode != QImode)
23747 destexp = gen_rtx_ASHIFT (Pmode, countreg,
23748 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23749 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
23751 else
23752 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
23753 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
23755 rounded_count = (INTVAL (count)
23756 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23757 destmem = shallow_copy_rtx (destmem);
23758 set_mem_size (destmem, rounded_count);
23760 else if (MEM_SIZE_KNOWN_P (destmem))
23761 clear_mem_size (destmem);
23763 if (issetmem)
23765 value = force_reg (mode, gen_lowpart (mode, value));
23766 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
23768 else
23770 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
23771 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
23772 if (mode != QImode)
23774 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
23775 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23776 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
23778 else
23779 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23780 if (CONST_INT_P (count))
23782 rounded_count = (INTVAL (count)
23783 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23784 srcmem = shallow_copy_rtx (srcmem);
23785 set_mem_size (srcmem, rounded_count);
23787 else
23789 if (MEM_SIZE_KNOWN_P (srcmem))
23790 clear_mem_size (srcmem);
23792 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23793 destexp, srcexp));
23797 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23798 DESTMEM.
23799 SRC is passed by pointer to be updated on return.
23800 Return value is updated DST. */
23801 static rtx
23802 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23803 HOST_WIDE_INT size_to_move)
23805 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23806 enum insn_code code;
23807 machine_mode move_mode;
23808 int piece_size, i;
23810 /* Find the widest mode in which we could perform moves.
23811 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23812 it until move of such size is supported. */
23813 piece_size = 1 << floor_log2 (size_to_move);
23814 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23815 code = optab_handler (mov_optab, move_mode);
23816 while (code == CODE_FOR_nothing && piece_size > 1)
23818 piece_size >>= 1;
23819 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23820 code = optab_handler (mov_optab, move_mode);
23823 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23824 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23825 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23827 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23828 move_mode = mode_for_vector (word_mode, nunits);
23829 code = optab_handler (mov_optab, move_mode);
23830 if (code == CODE_FOR_nothing)
23832 move_mode = word_mode;
23833 piece_size = GET_MODE_SIZE (move_mode);
23834 code = optab_handler (mov_optab, move_mode);
23837 gcc_assert (code != CODE_FOR_nothing);
23839 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23840 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23842 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23843 gcc_assert (size_to_move % piece_size == 0);
23844 adjust = GEN_INT (piece_size);
23845 for (i = 0; i < size_to_move; i += piece_size)
23847 /* We move from memory to memory, so we'll need to do it via
23848 a temporary register. */
23849 tempreg = gen_reg_rtx (move_mode);
23850 emit_insn (GEN_FCN (code) (tempreg, src));
23851 emit_insn (GEN_FCN (code) (dst, tempreg));
23853 emit_move_insn (destptr,
23854 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23855 emit_move_insn (srcptr,
23856 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23858 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23859 piece_size);
23860 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23861 piece_size);
23864 /* Update DST and SRC rtx. */
23865 *srcmem = src;
23866 return dst;
23869 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23870 static void
23871 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23872 rtx destptr, rtx srcptr, rtx count, int max_size)
23874 rtx src, dest;
23875 if (CONST_INT_P (count))
23877 HOST_WIDE_INT countval = INTVAL (count);
23878 HOST_WIDE_INT epilogue_size = countval % max_size;
23879 int i;
23881 /* For now MAX_SIZE should be a power of 2. This assert could be
23882 relaxed, but it'll require a bit more complicated epilogue
23883 expanding. */
23884 gcc_assert ((max_size & (max_size - 1)) == 0);
23885 for (i = max_size; i >= 1; i >>= 1)
23887 if (epilogue_size & i)
23888 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23890 return;
23892 if (max_size > 8)
23894 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23895 count, 1, OPTAB_DIRECT);
23896 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23897 count, QImode, 1, 4, false);
23898 return;
23901 /* When there are stringops, we can cheaply increase dest and src pointers.
23902 Otherwise we save code size by maintaining offset (zero is readily
23903 available from preceding rep operation) and using x86 addressing modes.
23905 if (TARGET_SINGLE_STRINGOP)
23907 if (max_size > 4)
23909 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23910 src = change_address (srcmem, SImode, srcptr);
23911 dest = change_address (destmem, SImode, destptr);
23912 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23913 emit_label (label);
23914 LABEL_NUSES (label) = 1;
23916 if (max_size > 2)
23918 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23919 src = change_address (srcmem, HImode, srcptr);
23920 dest = change_address (destmem, HImode, destptr);
23921 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23922 emit_label (label);
23923 LABEL_NUSES (label) = 1;
23925 if (max_size > 1)
23927 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23928 src = change_address (srcmem, QImode, srcptr);
23929 dest = change_address (destmem, QImode, destptr);
23930 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23931 emit_label (label);
23932 LABEL_NUSES (label) = 1;
23935 else
23937 rtx offset = force_reg (Pmode, const0_rtx);
23938 rtx tmp;
23940 if (max_size > 4)
23942 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23943 src = change_address (srcmem, SImode, srcptr);
23944 dest = change_address (destmem, SImode, destptr);
23945 emit_move_insn (dest, src);
23946 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23947 true, OPTAB_LIB_WIDEN);
23948 if (tmp != offset)
23949 emit_move_insn (offset, tmp);
23950 emit_label (label);
23951 LABEL_NUSES (label) = 1;
23953 if (max_size > 2)
23955 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23956 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23957 src = change_address (srcmem, HImode, tmp);
23958 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23959 dest = change_address (destmem, HImode, tmp);
23960 emit_move_insn (dest, src);
23961 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23962 true, OPTAB_LIB_WIDEN);
23963 if (tmp != offset)
23964 emit_move_insn (offset, tmp);
23965 emit_label (label);
23966 LABEL_NUSES (label) = 1;
23968 if (max_size > 1)
23970 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23971 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23972 src = change_address (srcmem, QImode, tmp);
23973 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23974 dest = change_address (destmem, QImode, tmp);
23975 emit_move_insn (dest, src);
23976 emit_label (label);
23977 LABEL_NUSES (label) = 1;
23982 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23983 with value PROMOTED_VAL.
23984 SRC is passed by pointer to be updated on return.
23985 Return value is updated DST. */
23986 static rtx
23987 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23988 HOST_WIDE_INT size_to_move)
23990 rtx dst = destmem, adjust;
23991 enum insn_code code;
23992 machine_mode move_mode;
23993 int piece_size, i;
23995 /* Find the widest mode in which we could perform moves.
23996 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23997 it until move of such size is supported. */
23998 move_mode = GET_MODE (promoted_val);
23999 if (move_mode == VOIDmode)
24000 move_mode = QImode;
24001 if (size_to_move < GET_MODE_SIZE (move_mode))
24003 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
24004 promoted_val = gen_lowpart (move_mode, promoted_val);
24006 piece_size = GET_MODE_SIZE (move_mode);
24007 code = optab_handler (mov_optab, move_mode);
24008 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
24010 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
24012 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
24013 gcc_assert (size_to_move % piece_size == 0);
24014 adjust = GEN_INT (piece_size);
24015 for (i = 0; i < size_to_move; i += piece_size)
24017 if (piece_size <= GET_MODE_SIZE (word_mode))
24019 emit_insn (gen_strset (destptr, dst, promoted_val));
24020 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
24021 piece_size);
24022 continue;
24025 emit_insn (GEN_FCN (code) (dst, promoted_val));
24027 emit_move_insn (destptr,
24028 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
24030 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
24031 piece_size);
24034 /* Update DST rtx. */
24035 return dst;
24037 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
24038 static void
24039 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
24040 rtx count, int max_size)
24042 count =
24043 expand_simple_binop (counter_mode (count), AND, count,
24044 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
24045 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
24046 gen_lowpart (QImode, value), count, QImode,
24047 1, max_size / 2, true);
24050 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
24051 static void
24052 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
24053 rtx count, int max_size)
24055 rtx dest;
24057 if (CONST_INT_P (count))
24059 HOST_WIDE_INT countval = INTVAL (count);
24060 HOST_WIDE_INT epilogue_size = countval % max_size;
24061 int i;
24063 /* For now MAX_SIZE should be a power of 2. This assert could be
24064 relaxed, but it'll require a bit more complicated epilogue
24065 expanding. */
24066 gcc_assert ((max_size & (max_size - 1)) == 0);
24067 for (i = max_size; i >= 1; i >>= 1)
24069 if (epilogue_size & i)
24071 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
24072 destmem = emit_memset (destmem, destptr, vec_value, i);
24073 else
24074 destmem = emit_memset (destmem, destptr, value, i);
24077 return;
24079 if (max_size > 32)
24081 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
24082 return;
24084 if (max_size > 16)
24086 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
24087 if (TARGET_64BIT)
24089 dest = change_address (destmem, DImode, destptr);
24090 emit_insn (gen_strset (destptr, dest, value));
24091 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
24092 emit_insn (gen_strset (destptr, dest, value));
24094 else
24096 dest = change_address (destmem, SImode, destptr);
24097 emit_insn (gen_strset (destptr, dest, value));
24098 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
24099 emit_insn (gen_strset (destptr, dest, value));
24100 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
24101 emit_insn (gen_strset (destptr, dest, value));
24102 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
24103 emit_insn (gen_strset (destptr, dest, value));
24105 emit_label (label);
24106 LABEL_NUSES (label) = 1;
24108 if (max_size > 8)
24110 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
24111 if (TARGET_64BIT)
24113 dest = change_address (destmem, DImode, destptr);
24114 emit_insn (gen_strset (destptr, dest, value));
24116 else
24118 dest = change_address (destmem, SImode, destptr);
24119 emit_insn (gen_strset (destptr, dest, value));
24120 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
24121 emit_insn (gen_strset (destptr, dest, value));
24123 emit_label (label);
24124 LABEL_NUSES (label) = 1;
24126 if (max_size > 4)
24128 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
24129 dest = change_address (destmem, SImode, destptr);
24130 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
24131 emit_label (label);
24132 LABEL_NUSES (label) = 1;
24134 if (max_size > 2)
24136 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
24137 dest = change_address (destmem, HImode, destptr);
24138 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
24139 emit_label (label);
24140 LABEL_NUSES (label) = 1;
24142 if (max_size > 1)
24144 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
24145 dest = change_address (destmem, QImode, destptr);
24146 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
24147 emit_label (label);
24148 LABEL_NUSES (label) = 1;
24152 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
24153 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
24154 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
24155 ignored.
24156 Return value is updated DESTMEM. */
24157 static rtx
24158 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
24159 rtx destptr, rtx srcptr, rtx value,
24160 rtx vec_value, rtx count, int align,
24161 int desired_alignment, bool issetmem)
24163 int i;
24164 for (i = 1; i < desired_alignment; i <<= 1)
24166 if (align <= i)
24168 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
24169 if (issetmem)
24171 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
24172 destmem = emit_memset (destmem, destptr, vec_value, i);
24173 else
24174 destmem = emit_memset (destmem, destptr, value, i);
24176 else
24177 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
24178 ix86_adjust_counter (count, i);
24179 emit_label (label);
24180 LABEL_NUSES (label) = 1;
24181 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
24184 return destmem;
24187 /* Test if COUNT&SIZE is nonzero and if so, expand movme
24188 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
24189 and jump to DONE_LABEL. */
24190 static void
24191 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
24192 rtx destptr, rtx srcptr,
24193 rtx value, rtx vec_value,
24194 rtx count, int size,
24195 rtx done_label, bool issetmem)
24197 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
24198 machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
24199 rtx modesize;
24200 int n;
24202 /* If we do not have vector value to copy, we must reduce size. */
24203 if (issetmem)
24205 if (!vec_value)
24207 if (GET_MODE (value) == VOIDmode && size > 8)
24208 mode = Pmode;
24209 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
24210 mode = GET_MODE (value);
24212 else
24213 mode = GET_MODE (vec_value), value = vec_value;
24215 else
24217 /* Choose appropriate vector mode. */
24218 if (size >= 32)
24219 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
24220 else if (size >= 16)
24221 mode = TARGET_SSE ? V16QImode : DImode;
24222 srcmem = change_address (srcmem, mode, srcptr);
24224 destmem = change_address (destmem, mode, destptr);
24225 modesize = GEN_INT (GET_MODE_SIZE (mode));
24226 gcc_assert (GET_MODE_SIZE (mode) <= size);
24227 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24229 if (issetmem)
24230 emit_move_insn (destmem, gen_lowpart (mode, value));
24231 else
24233 emit_move_insn (destmem, srcmem);
24234 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24236 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24239 destmem = offset_address (destmem, count, 1);
24240 destmem = offset_address (destmem, GEN_INT (-2 * size),
24241 GET_MODE_SIZE (mode));
24242 if (!issetmem)
24244 srcmem = offset_address (srcmem, count, 1);
24245 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
24246 GET_MODE_SIZE (mode));
24248 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24250 if (issetmem)
24251 emit_move_insn (destmem, gen_lowpart (mode, value));
24252 else
24254 emit_move_insn (destmem, srcmem);
24255 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24257 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24259 emit_jump_insn (gen_jump (done_label));
24260 emit_barrier ();
24262 emit_label (label);
24263 LABEL_NUSES (label) = 1;
24266 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
24267 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
24268 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
24269 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
24270 DONE_LABEL is a label after the whole copying sequence. The label is created
24271 on demand if *DONE_LABEL is NULL.
24272 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
24273 bounds after the initial copies.
24275 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
24276 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
24277 we will dispatch to a library call for large blocks.
24279 In pseudocode we do:
24281 if (COUNT < SIZE)
24283 Assume that SIZE is 4. Bigger sizes are handled analogously
24284 if (COUNT & 4)
24286 copy 4 bytes from SRCPTR to DESTPTR
24287 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
24288 goto done_label
24290 if (!COUNT)
24291 goto done_label;
24292 copy 1 byte from SRCPTR to DESTPTR
24293 if (COUNT & 2)
24295 copy 2 bytes from SRCPTR to DESTPTR
24296 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
24299 else
24301 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
24302 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
24304 OLD_DESPTR = DESTPTR;
24305 Align DESTPTR up to DESIRED_ALIGN
24306 SRCPTR += DESTPTR - OLD_DESTPTR
24307 COUNT -= DEST_PTR - OLD_DESTPTR
24308 if (DYNAMIC_CHECK)
24309 Round COUNT down to multiple of SIZE
24310 << optional caller supplied zero size guard is here >>
24311 << optional caller suppplied dynamic check is here >>
24312 << caller supplied main copy loop is here >>
24314 done_label:
24316 static void
24317 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
24318 rtx *destptr, rtx *srcptr,
24319 machine_mode mode,
24320 rtx value, rtx vec_value,
24321 rtx *count,
24322 rtx_code_label **done_label,
24323 int size,
24324 int desired_align,
24325 int align,
24326 unsigned HOST_WIDE_INT *min_size,
24327 bool dynamic_check,
24328 bool issetmem)
24330 rtx_code_label *loop_label = NULL, *label;
24331 int n;
24332 rtx modesize;
24333 int prolog_size = 0;
24334 rtx mode_value;
24336 /* Chose proper value to copy. */
24337 if (issetmem && VECTOR_MODE_P (mode))
24338 mode_value = vec_value;
24339 else
24340 mode_value = value;
24341 gcc_assert (GET_MODE_SIZE (mode) <= size);
24343 /* See if block is big or small, handle small blocks. */
24344 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
24346 int size2 = size;
24347 loop_label = gen_label_rtx ();
24349 if (!*done_label)
24350 *done_label = gen_label_rtx ();
24352 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
24353 1, loop_label);
24354 size2 >>= 1;
24356 /* Handle sizes > 3. */
24357 for (;size2 > 2; size2 >>= 1)
24358 expand_small_movmem_or_setmem (destmem, srcmem,
24359 *destptr, *srcptr,
24360 value, vec_value,
24361 *count,
24362 size2, *done_label, issetmem);
24363 /* Nothing to copy? Jump to DONE_LABEL if so */
24364 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
24365 1, *done_label);
24367 /* Do a byte copy. */
24368 destmem = change_address (destmem, QImode, *destptr);
24369 if (issetmem)
24370 emit_move_insn (destmem, gen_lowpart (QImode, value));
24371 else
24373 srcmem = change_address (srcmem, QImode, *srcptr);
24374 emit_move_insn (destmem, srcmem);
24377 /* Handle sizes 2 and 3. */
24378 label = ix86_expand_aligntest (*count, 2, false);
24379 destmem = change_address (destmem, HImode, *destptr);
24380 destmem = offset_address (destmem, *count, 1);
24381 destmem = offset_address (destmem, GEN_INT (-2), 2);
24382 if (issetmem)
24383 emit_move_insn (destmem, gen_lowpart (HImode, value));
24384 else
24386 srcmem = change_address (srcmem, HImode, *srcptr);
24387 srcmem = offset_address (srcmem, *count, 1);
24388 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
24389 emit_move_insn (destmem, srcmem);
24392 emit_label (label);
24393 LABEL_NUSES (label) = 1;
24394 emit_jump_insn (gen_jump (*done_label));
24395 emit_barrier ();
24397 else
24398 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
24399 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
24401 /* Start memcpy for COUNT >= SIZE. */
24402 if (loop_label)
24404 emit_label (loop_label);
24405 LABEL_NUSES (loop_label) = 1;
24408 /* Copy first desired_align bytes. */
24409 if (!issetmem)
24410 srcmem = change_address (srcmem, mode, *srcptr);
24411 destmem = change_address (destmem, mode, *destptr);
24412 modesize = GEN_INT (GET_MODE_SIZE (mode));
24413 for (n = 0; prolog_size < desired_align - align; n++)
24415 if (issetmem)
24416 emit_move_insn (destmem, mode_value);
24417 else
24419 emit_move_insn (destmem, srcmem);
24420 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24422 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24423 prolog_size += GET_MODE_SIZE (mode);
24427 /* Copy last SIZE bytes. */
24428 destmem = offset_address (destmem, *count, 1);
24429 destmem = offset_address (destmem,
24430 GEN_INT (-size - prolog_size),
24432 if (issetmem)
24433 emit_move_insn (destmem, mode_value);
24434 else
24436 srcmem = offset_address (srcmem, *count, 1);
24437 srcmem = offset_address (srcmem,
24438 GEN_INT (-size - prolog_size),
24440 emit_move_insn (destmem, srcmem);
24442 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
24444 destmem = offset_address (destmem, modesize, 1);
24445 if (issetmem)
24446 emit_move_insn (destmem, mode_value);
24447 else
24449 srcmem = offset_address (srcmem, modesize, 1);
24450 emit_move_insn (destmem, srcmem);
24454 /* Align destination. */
24455 if (desired_align > 1 && desired_align > align)
24457 rtx saveddest = *destptr;
24459 gcc_assert (desired_align <= size);
24460 /* Align destptr up, place it to new register. */
24461 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
24462 GEN_INT (prolog_size),
24463 NULL_RTX, 1, OPTAB_DIRECT);
24464 if (REG_P (*destptr) && REG_P (saveddest) && REG_POINTER (saveddest))
24465 REG_POINTER (*destptr) = 1;
24466 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
24467 GEN_INT (-desired_align),
24468 *destptr, 1, OPTAB_DIRECT);
24469 /* See how many bytes we skipped. */
24470 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
24471 *destptr,
24472 saveddest, 1, OPTAB_DIRECT);
24473 /* Adjust srcptr and count. */
24474 if (!issetmem)
24475 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr,
24476 saveddest, *srcptr, 1, OPTAB_DIRECT);
24477 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24478 saveddest, *count, 1, OPTAB_DIRECT);
24479 /* We copied at most size + prolog_size. */
24480 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
24481 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
24482 else
24483 *min_size = 0;
24485 /* Our loops always round down the bock size, but for dispatch to library
24486 we need precise value. */
24487 if (dynamic_check)
24488 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
24489 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
24491 else
24493 gcc_assert (prolog_size == 0);
24494 /* Decrease count, so we won't end up copying last word twice. */
24495 if (!CONST_INT_P (*count))
24496 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24497 constm1_rtx, *count, 1, OPTAB_DIRECT);
24498 else
24499 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
24500 if (*min_size)
24501 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
24506 /* This function is like the previous one, except here we know how many bytes
24507 need to be copied. That allows us to update alignment not only of DST, which
24508 is returned, but also of SRC, which is passed as a pointer for that
24509 reason. */
24510 static rtx
24511 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
24512 rtx srcreg, rtx value, rtx vec_value,
24513 int desired_align, int align_bytes,
24514 bool issetmem)
24516 rtx src = NULL;
24517 rtx orig_dst = dst;
24518 rtx orig_src = NULL;
24519 int piece_size = 1;
24520 int copied_bytes = 0;
24522 if (!issetmem)
24524 gcc_assert (srcp != NULL);
24525 src = *srcp;
24526 orig_src = src;
24529 for (piece_size = 1;
24530 piece_size <= desired_align && copied_bytes < align_bytes;
24531 piece_size <<= 1)
24533 if (align_bytes & piece_size)
24535 if (issetmem)
24537 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
24538 dst = emit_memset (dst, destreg, vec_value, piece_size);
24539 else
24540 dst = emit_memset (dst, destreg, value, piece_size);
24542 else
24543 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
24544 copied_bytes += piece_size;
24547 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
24548 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24549 if (MEM_SIZE_KNOWN_P (orig_dst))
24550 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
24552 if (!issetmem)
24554 int src_align_bytes = get_mem_align_offset (src, desired_align
24555 * BITS_PER_UNIT);
24556 if (src_align_bytes >= 0)
24557 src_align_bytes = desired_align - src_align_bytes;
24558 if (src_align_bytes >= 0)
24560 unsigned int src_align;
24561 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
24563 if ((src_align_bytes & (src_align - 1))
24564 == (align_bytes & (src_align - 1)))
24565 break;
24567 if (src_align > (unsigned int) desired_align)
24568 src_align = desired_align;
24569 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
24570 set_mem_align (src, src_align * BITS_PER_UNIT);
24572 if (MEM_SIZE_KNOWN_P (orig_src))
24573 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
24574 *srcp = src;
24577 return dst;
24580 /* Return true if ALG can be used in current context.
24581 Assume we expand memset if MEMSET is true. */
24582 static bool
24583 alg_usable_p (enum stringop_alg alg, bool memset)
24585 if (alg == no_stringop)
24586 return false;
24587 if (alg == vector_loop)
24588 return TARGET_SSE || TARGET_AVX;
24589 /* Algorithms using the rep prefix want at least edi and ecx;
24590 additionally, memset wants eax and memcpy wants esi. Don't
24591 consider such algorithms if the user has appropriated those
24592 registers for their own purposes. */
24593 if (alg == rep_prefix_1_byte
24594 || alg == rep_prefix_4_byte
24595 || alg == rep_prefix_8_byte)
24596 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
24597 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
24598 return true;
24601 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
24602 static enum stringop_alg
24603 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
24604 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
24605 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
24607 const struct stringop_algs * algs;
24608 bool optimize_for_speed;
24609 int max = 0;
24610 const struct processor_costs *cost;
24611 int i;
24612 bool any_alg_usable_p = false;
24614 *noalign = false;
24615 *dynamic_check = -1;
24617 /* Even if the string operation call is cold, we still might spend a lot
24618 of time processing large blocks. */
24619 if (optimize_function_for_size_p (cfun)
24620 || (optimize_insn_for_size_p ()
24621 && (max_size < 256
24622 || (expected_size != -1 && expected_size < 256))))
24623 optimize_for_speed = false;
24624 else
24625 optimize_for_speed = true;
24627 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
24628 if (memset)
24629 algs = &cost->memset[TARGET_64BIT != 0];
24630 else
24631 algs = &cost->memcpy[TARGET_64BIT != 0];
24633 /* See maximal size for user defined algorithm. */
24634 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24636 enum stringop_alg candidate = algs->size[i].alg;
24637 bool usable = alg_usable_p (candidate, memset);
24638 any_alg_usable_p |= usable;
24640 if (candidate != libcall && candidate && usable)
24641 max = algs->size[i].max;
24644 /* If expected size is not known but max size is small enough
24645 so inline version is a win, set expected size into
24646 the range. */
24647 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
24648 && expected_size == -1)
24649 expected_size = min_size / 2 + max_size / 2;
24651 /* If user specified the algorithm, honnor it if possible. */
24652 if (ix86_stringop_alg != no_stringop
24653 && alg_usable_p (ix86_stringop_alg, memset))
24654 return ix86_stringop_alg;
24655 /* rep; movq or rep; movl is the smallest variant. */
24656 else if (!optimize_for_speed)
24658 *noalign = true;
24659 if (!count || (count & 3) || (memset && !zero_memset))
24660 return alg_usable_p (rep_prefix_1_byte, memset)
24661 ? rep_prefix_1_byte : loop_1_byte;
24662 else
24663 return alg_usable_p (rep_prefix_4_byte, memset)
24664 ? rep_prefix_4_byte : loop;
24666 /* Very tiny blocks are best handled via the loop, REP is expensive to
24667 setup. */
24668 else if (expected_size != -1 && expected_size < 4)
24669 return loop_1_byte;
24670 else if (expected_size != -1)
24672 enum stringop_alg alg = libcall;
24673 bool alg_noalign = false;
24674 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24676 /* We get here if the algorithms that were not libcall-based
24677 were rep-prefix based and we are unable to use rep prefixes
24678 based on global register usage. Break out of the loop and
24679 use the heuristic below. */
24680 if (algs->size[i].max == 0)
24681 break;
24682 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
24684 enum stringop_alg candidate = algs->size[i].alg;
24686 if (candidate != libcall && alg_usable_p (candidate, memset))
24688 alg = candidate;
24689 alg_noalign = algs->size[i].noalign;
24691 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
24692 last non-libcall inline algorithm. */
24693 if (TARGET_INLINE_ALL_STRINGOPS)
24695 /* When the current size is best to be copied by a libcall,
24696 but we are still forced to inline, run the heuristic below
24697 that will pick code for medium sized blocks. */
24698 if (alg != libcall)
24700 *noalign = alg_noalign;
24701 return alg;
24703 else if (!any_alg_usable_p)
24704 break;
24706 else if (alg_usable_p (candidate, memset))
24708 *noalign = algs->size[i].noalign;
24709 return candidate;
24714 /* When asked to inline the call anyway, try to pick meaningful choice.
24715 We look for maximal size of block that is faster to copy by hand and
24716 take blocks of at most of that size guessing that average size will
24717 be roughly half of the block.
24719 If this turns out to be bad, we might simply specify the preferred
24720 choice in ix86_costs. */
24721 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24722 && (algs->unknown_size == libcall
24723 || !alg_usable_p (algs->unknown_size, memset)))
24725 enum stringop_alg alg;
24727 /* If there aren't any usable algorithms, then recursing on
24728 smaller sizes isn't going to find anything. Just return the
24729 simple byte-at-a-time copy loop. */
24730 if (!any_alg_usable_p)
24732 /* Pick something reasonable. */
24733 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24734 *dynamic_check = 128;
24735 return loop_1_byte;
24737 if (max <= 0)
24738 max = 4096;
24739 alg = decide_alg (count, max / 2, min_size, max_size, memset,
24740 zero_memset, dynamic_check, noalign);
24741 gcc_assert (*dynamic_check == -1);
24742 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24743 *dynamic_check = max;
24744 else
24745 gcc_assert (alg != libcall);
24746 return alg;
24748 return (alg_usable_p (algs->unknown_size, memset)
24749 ? algs->unknown_size : libcall);
24752 /* Decide on alignment. We know that the operand is already aligned to ALIGN
24753 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
24754 static int
24755 decide_alignment (int align,
24756 enum stringop_alg alg,
24757 int expected_size,
24758 machine_mode move_mode)
24760 int desired_align = 0;
24762 gcc_assert (alg != no_stringop);
24764 if (alg == libcall)
24765 return 0;
24766 if (move_mode == VOIDmode)
24767 return 0;
24769 desired_align = GET_MODE_SIZE (move_mode);
24770 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
24771 copying whole cacheline at once. */
24772 if (TARGET_PENTIUMPRO
24773 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
24774 desired_align = 8;
24776 if (optimize_size)
24777 desired_align = 1;
24778 if (desired_align < align)
24779 desired_align = align;
24780 if (expected_size != -1 && expected_size < 4)
24781 desired_align = align;
24783 return desired_align;
24787 /* Helper function for memcpy. For QImode value 0xXY produce
24788 0xXYXYXYXY of wide specified by MODE. This is essentially
24789 a * 0x10101010, but we can do slightly better than
24790 synth_mult by unwinding the sequence by hand on CPUs with
24791 slow multiply. */
24792 static rtx
24793 promote_duplicated_reg (machine_mode mode, rtx val)
24795 machine_mode valmode = GET_MODE (val);
24796 rtx tmp;
24797 int nops = mode == DImode ? 3 : 2;
24799 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24800 if (val == const0_rtx)
24801 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24802 if (CONST_INT_P (val))
24804 HOST_WIDE_INT v = INTVAL (val) & 255;
24806 v |= v << 8;
24807 v |= v << 16;
24808 if (mode == DImode)
24809 v |= (v << 16) << 16;
24810 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24813 if (valmode == VOIDmode)
24814 valmode = QImode;
24815 if (valmode != QImode)
24816 val = gen_lowpart (QImode, val);
24817 if (mode == QImode)
24818 return val;
24819 if (!TARGET_PARTIAL_REG_STALL)
24820 nops--;
24821 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24822 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24823 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24824 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24826 rtx reg = convert_modes (mode, QImode, val, true);
24827 tmp = promote_duplicated_reg (mode, const1_rtx);
24828 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24829 OPTAB_DIRECT);
24831 else
24833 rtx reg = convert_modes (mode, QImode, val, true);
24835 if (!TARGET_PARTIAL_REG_STALL)
24836 if (mode == SImode)
24837 emit_insn (gen_movsi_insv_1 (reg, reg));
24838 else
24839 emit_insn (gen_movdi_insv_1 (reg, reg));
24840 else
24842 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24843 NULL, 1, OPTAB_DIRECT);
24844 reg =
24845 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24847 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24848 NULL, 1, OPTAB_DIRECT);
24849 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24850 if (mode == SImode)
24851 return reg;
24852 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24853 NULL, 1, OPTAB_DIRECT);
24854 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24855 return reg;
24859 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24860 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24861 alignment from ALIGN to DESIRED_ALIGN. */
24862 static rtx
24863 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24864 int align)
24866 rtx promoted_val;
24868 if (TARGET_64BIT
24869 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24870 promoted_val = promote_duplicated_reg (DImode, val);
24871 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24872 promoted_val = promote_duplicated_reg (SImode, val);
24873 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24874 promoted_val = promote_duplicated_reg (HImode, val);
24875 else
24876 promoted_val = val;
24878 return promoted_val;
24881 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24882 operations when profitable. The code depends upon architecture, block size
24883 and alignment, but always has one of the following overall structures:
24885 Aligned move sequence:
24887 1) Prologue guard: Conditional that jumps up to epilogues for small
24888 blocks that can be handled by epilogue alone. This is faster
24889 but also needed for correctness, since prologue assume the block
24890 is larger than the desired alignment.
24892 Optional dynamic check for size and libcall for large
24893 blocks is emitted here too, with -minline-stringops-dynamically.
24895 2) Prologue: copy first few bytes in order to get destination
24896 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24897 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24898 copied. We emit either a jump tree on power of two sized
24899 blocks, or a byte loop.
24901 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24902 with specified algorithm.
24904 4) Epilogue: code copying tail of the block that is too small to be
24905 handled by main body (or up to size guarded by prologue guard).
24907 Misaligned move sequence
24909 1) missaligned move prologue/epilogue containing:
24910 a) Prologue handling small memory blocks and jumping to done_label
24911 (skipped if blocks are known to be large enough)
24912 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24913 needed by single possibly misaligned move
24914 (skipped if alignment is not needed)
24915 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24917 2) Zero size guard dispatching to done_label, if needed
24919 3) dispatch to library call, if needed,
24921 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24922 with specified algorithm. */
24923 bool
24924 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24925 rtx align_exp, rtx expected_align_exp,
24926 rtx expected_size_exp, rtx min_size_exp,
24927 rtx max_size_exp, rtx probable_max_size_exp,
24928 bool issetmem)
24930 rtx destreg;
24931 rtx srcreg = NULL;
24932 rtx_code_label *label = NULL;
24933 rtx tmp;
24934 rtx_code_label *jump_around_label = NULL;
24935 HOST_WIDE_INT align = 1;
24936 unsigned HOST_WIDE_INT count = 0;
24937 HOST_WIDE_INT expected_size = -1;
24938 int size_needed = 0, epilogue_size_needed;
24939 int desired_align = 0, align_bytes = 0;
24940 enum stringop_alg alg;
24941 rtx promoted_val = NULL;
24942 rtx vec_promoted_val = NULL;
24943 bool force_loopy_epilogue = false;
24944 int dynamic_check;
24945 bool need_zero_guard = false;
24946 bool noalign;
24947 machine_mode move_mode = VOIDmode;
24948 int unroll_factor = 1;
24949 /* TODO: Once value ranges are available, fill in proper data. */
24950 unsigned HOST_WIDE_INT min_size = 0;
24951 unsigned HOST_WIDE_INT max_size = -1;
24952 unsigned HOST_WIDE_INT probable_max_size = -1;
24953 bool misaligned_prologue_used = false;
24955 if (CONST_INT_P (align_exp))
24956 align = INTVAL (align_exp);
24957 /* i386 can do misaligned access on reasonably increased cost. */
24958 if (CONST_INT_P (expected_align_exp)
24959 && INTVAL (expected_align_exp) > align)
24960 align = INTVAL (expected_align_exp);
24961 /* ALIGN is the minimum of destination and source alignment, but we care here
24962 just about destination alignment. */
24963 else if (!issetmem
24964 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24965 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24967 if (CONST_INT_P (count_exp))
24969 min_size = max_size = probable_max_size = count = expected_size
24970 = INTVAL (count_exp);
24971 /* When COUNT is 0, there is nothing to do. */
24972 if (!count)
24973 return true;
24975 else
24977 if (min_size_exp)
24978 min_size = INTVAL (min_size_exp);
24979 if (max_size_exp)
24980 max_size = INTVAL (max_size_exp);
24981 if (probable_max_size_exp)
24982 probable_max_size = INTVAL (probable_max_size_exp);
24983 if (CONST_INT_P (expected_size_exp))
24984 expected_size = INTVAL (expected_size_exp);
24987 /* Make sure we don't need to care about overflow later on. */
24988 if (count > (HOST_WIDE_INT_1U << 30))
24989 return false;
24991 /* Step 0: Decide on preferred algorithm, desired alignment and
24992 size of chunks to be copied by main loop. */
24993 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24994 issetmem,
24995 issetmem && val_exp == const0_rtx,
24996 &dynamic_check, &noalign);
24997 if (alg == libcall)
24998 return false;
24999 gcc_assert (alg != no_stringop);
25001 /* For now vector-version of memset is generated only for memory zeroing, as
25002 creating of promoted vector value is very cheap in this case. */
25003 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
25004 alg = unrolled_loop;
25006 if (!count)
25007 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
25008 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
25009 if (!issetmem)
25010 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
25012 unroll_factor = 1;
25013 move_mode = word_mode;
25014 switch (alg)
25016 case libcall:
25017 case no_stringop:
25018 case last_alg:
25019 gcc_unreachable ();
25020 case loop_1_byte:
25021 need_zero_guard = true;
25022 move_mode = QImode;
25023 break;
25024 case loop:
25025 need_zero_guard = true;
25026 break;
25027 case unrolled_loop:
25028 need_zero_guard = true;
25029 unroll_factor = (TARGET_64BIT ? 4 : 2);
25030 break;
25031 case vector_loop:
25032 need_zero_guard = true;
25033 unroll_factor = 4;
25034 /* Find the widest supported mode. */
25035 move_mode = word_mode;
25036 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
25037 != CODE_FOR_nothing)
25038 move_mode = GET_MODE_WIDER_MODE (move_mode);
25040 /* Find the corresponding vector mode with the same size as MOVE_MODE.
25041 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
25042 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
25044 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
25045 move_mode = mode_for_vector (word_mode, nunits);
25046 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
25047 move_mode = word_mode;
25049 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
25050 break;
25051 case rep_prefix_8_byte:
25052 move_mode = DImode;
25053 break;
25054 case rep_prefix_4_byte:
25055 move_mode = SImode;
25056 break;
25057 case rep_prefix_1_byte:
25058 move_mode = QImode;
25059 break;
25061 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
25062 epilogue_size_needed = size_needed;
25064 desired_align = decide_alignment (align, alg, expected_size, move_mode);
25065 if (!TARGET_ALIGN_STRINGOPS || noalign)
25066 align = desired_align;
25068 /* Step 1: Prologue guard. */
25070 /* Alignment code needs count to be in register. */
25071 if (CONST_INT_P (count_exp) && desired_align > align)
25073 if (INTVAL (count_exp) > desired_align
25074 && INTVAL (count_exp) > size_needed)
25076 align_bytes
25077 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
25078 if (align_bytes <= 0)
25079 align_bytes = 0;
25080 else
25081 align_bytes = desired_align - align_bytes;
25083 if (align_bytes == 0)
25084 count_exp = force_reg (counter_mode (count_exp), count_exp);
25086 gcc_assert (desired_align >= 1 && align >= 1);
25088 /* Misaligned move sequences handle both prologue and epilogue at once.
25089 Default code generation results in a smaller code for large alignments
25090 and also avoids redundant job when sizes are known precisely. */
25091 misaligned_prologue_used
25092 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
25093 && MAX (desired_align, epilogue_size_needed) <= 32
25094 && desired_align <= epilogue_size_needed
25095 && ((desired_align > align && !align_bytes)
25096 || (!count && epilogue_size_needed > 1)));
25098 /* Do the cheap promotion to allow better CSE across the
25099 main loop and epilogue (ie one load of the big constant in the
25100 front of all code.
25101 For now the misaligned move sequences do not have fast path
25102 without broadcasting. */
25103 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
25105 if (alg == vector_loop)
25107 gcc_assert (val_exp == const0_rtx);
25108 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
25109 promoted_val = promote_duplicated_reg_to_size (val_exp,
25110 GET_MODE_SIZE (word_mode),
25111 desired_align, align);
25113 else
25115 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
25116 desired_align, align);
25119 /* Misaligned move sequences handles both prologues and epilogues at once.
25120 Default code generation results in smaller code for large alignments and
25121 also avoids redundant job when sizes are known precisely. */
25122 if (misaligned_prologue_used)
25124 /* Misaligned move prologue handled small blocks by itself. */
25125 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
25126 (dst, src, &destreg, &srcreg,
25127 move_mode, promoted_val, vec_promoted_val,
25128 &count_exp,
25129 &jump_around_label,
25130 desired_align < align
25131 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
25132 desired_align, align, &min_size, dynamic_check, issetmem);
25133 if (!issetmem)
25134 src = change_address (src, BLKmode, srcreg);
25135 dst = change_address (dst, BLKmode, destreg);
25136 set_mem_align (dst, desired_align * BITS_PER_UNIT);
25137 epilogue_size_needed = 0;
25138 if (need_zero_guard && !min_size)
25140 /* It is possible that we copied enough so the main loop will not
25141 execute. */
25142 gcc_assert (size_needed > 1);
25143 if (jump_around_label == NULL_RTX)
25144 jump_around_label = gen_label_rtx ();
25145 emit_cmp_and_jump_insns (count_exp,
25146 GEN_INT (size_needed),
25147 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
25148 if (expected_size == -1
25149 || expected_size < (desired_align - align) / 2 + size_needed)
25150 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25151 else
25152 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25155 /* Ensure that alignment prologue won't copy past end of block. */
25156 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
25158 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
25159 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
25160 Make sure it is power of 2. */
25161 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
25163 /* To improve performance of small blocks, we jump around the VAL
25164 promoting mode. This mean that if the promoted VAL is not constant,
25165 we might not use it in the epilogue and have to use byte
25166 loop variant. */
25167 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
25168 force_loopy_epilogue = true;
25169 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25170 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25172 /* If main algorithm works on QImode, no epilogue is needed.
25173 For small sizes just don't align anything. */
25174 if (size_needed == 1)
25175 desired_align = align;
25176 else
25177 goto epilogue;
25179 else if (!count
25180 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25182 label = gen_label_rtx ();
25183 emit_cmp_and_jump_insns (count_exp,
25184 GEN_INT (epilogue_size_needed),
25185 LTU, 0, counter_mode (count_exp), 1, label);
25186 if (expected_size == -1 || expected_size < epilogue_size_needed)
25187 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25188 else
25189 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25193 /* Emit code to decide on runtime whether library call or inline should be
25194 used. */
25195 if (dynamic_check != -1)
25197 if (!issetmem && CONST_INT_P (count_exp))
25199 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
25201 emit_block_move_via_libcall (dst, src, count_exp, false);
25202 count_exp = const0_rtx;
25203 goto epilogue;
25206 else
25208 rtx_code_label *hot_label = gen_label_rtx ();
25209 if (jump_around_label == NULL_RTX)
25210 jump_around_label = gen_label_rtx ();
25211 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
25212 LEU, 0, counter_mode (count_exp),
25213 1, hot_label);
25214 predict_jump (REG_BR_PROB_BASE * 90 / 100);
25215 if (issetmem)
25216 set_storage_via_libcall (dst, count_exp, val_exp, false);
25217 else
25218 emit_block_move_via_libcall (dst, src, count_exp, false);
25219 emit_jump (jump_around_label);
25220 emit_label (hot_label);
25224 /* Step 2: Alignment prologue. */
25225 /* Do the expensive promotion once we branched off the small blocks. */
25226 if (issetmem && !promoted_val)
25227 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
25228 desired_align, align);
25230 if (desired_align > align && !misaligned_prologue_used)
25232 if (align_bytes == 0)
25234 /* Except for the first move in prologue, we no longer know
25235 constant offset in aliasing info. It don't seems to worth
25236 the pain to maintain it for the first move, so throw away
25237 the info early. */
25238 dst = change_address (dst, BLKmode, destreg);
25239 if (!issetmem)
25240 src = change_address (src, BLKmode, srcreg);
25241 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
25242 promoted_val, vec_promoted_val,
25243 count_exp, align, desired_align,
25244 issetmem);
25245 /* At most desired_align - align bytes are copied. */
25246 if (min_size < (unsigned)(desired_align - align))
25247 min_size = 0;
25248 else
25249 min_size -= desired_align - align;
25251 else
25253 /* If we know how many bytes need to be stored before dst is
25254 sufficiently aligned, maintain aliasing info accurately. */
25255 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
25256 srcreg,
25257 promoted_val,
25258 vec_promoted_val,
25259 desired_align,
25260 align_bytes,
25261 issetmem);
25263 count_exp = plus_constant (counter_mode (count_exp),
25264 count_exp, -align_bytes);
25265 count -= align_bytes;
25266 min_size -= align_bytes;
25267 max_size -= align_bytes;
25269 if (need_zero_guard
25270 && !min_size
25271 && (count < (unsigned HOST_WIDE_INT) size_needed
25272 || (align_bytes == 0
25273 && count < ((unsigned HOST_WIDE_INT) size_needed
25274 + desired_align - align))))
25276 /* It is possible that we copied enough so the main loop will not
25277 execute. */
25278 gcc_assert (size_needed > 1);
25279 if (label == NULL_RTX)
25280 label = gen_label_rtx ();
25281 emit_cmp_and_jump_insns (count_exp,
25282 GEN_INT (size_needed),
25283 LTU, 0, counter_mode (count_exp), 1, label);
25284 if (expected_size == -1
25285 || expected_size < (desired_align - align) / 2 + size_needed)
25286 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25287 else
25288 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25291 if (label && size_needed == 1)
25293 emit_label (label);
25294 LABEL_NUSES (label) = 1;
25295 label = NULL;
25296 epilogue_size_needed = 1;
25297 if (issetmem)
25298 promoted_val = val_exp;
25300 else if (label == NULL_RTX && !misaligned_prologue_used)
25301 epilogue_size_needed = size_needed;
25303 /* Step 3: Main loop. */
25305 switch (alg)
25307 case libcall:
25308 case no_stringop:
25309 case last_alg:
25310 gcc_unreachable ();
25311 case loop_1_byte:
25312 case loop:
25313 case unrolled_loop:
25314 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
25315 count_exp, move_mode, unroll_factor,
25316 expected_size, issetmem);
25317 break;
25318 case vector_loop:
25319 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
25320 vec_promoted_val, count_exp, move_mode,
25321 unroll_factor, expected_size, issetmem);
25322 break;
25323 case rep_prefix_8_byte:
25324 case rep_prefix_4_byte:
25325 case rep_prefix_1_byte:
25326 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
25327 val_exp, count_exp, move_mode, issetmem);
25328 break;
25330 /* Adjust properly the offset of src and dest memory for aliasing. */
25331 if (CONST_INT_P (count_exp))
25333 if (!issetmem)
25334 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
25335 (count / size_needed) * size_needed);
25336 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
25337 (count / size_needed) * size_needed);
25339 else
25341 if (!issetmem)
25342 src = change_address (src, BLKmode, srcreg);
25343 dst = change_address (dst, BLKmode, destreg);
25346 /* Step 4: Epilogue to copy the remaining bytes. */
25347 epilogue:
25348 if (label)
25350 /* When the main loop is done, COUNT_EXP might hold original count,
25351 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
25352 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
25353 bytes. Compensate if needed. */
25355 if (size_needed < epilogue_size_needed)
25357 tmp =
25358 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
25359 GEN_INT (size_needed - 1), count_exp, 1,
25360 OPTAB_DIRECT);
25361 if (tmp != count_exp)
25362 emit_move_insn (count_exp, tmp);
25364 emit_label (label);
25365 LABEL_NUSES (label) = 1;
25368 if (count_exp != const0_rtx && epilogue_size_needed > 1)
25370 if (force_loopy_epilogue)
25371 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
25372 epilogue_size_needed);
25373 else
25375 if (issetmem)
25376 expand_setmem_epilogue (dst, destreg, promoted_val,
25377 vec_promoted_val, count_exp,
25378 epilogue_size_needed);
25379 else
25380 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
25381 epilogue_size_needed);
25384 if (jump_around_label)
25385 emit_label (jump_around_label);
25386 return true;
25390 /* Expand the appropriate insns for doing strlen if not just doing
25391 repnz; scasb
25393 out = result, initialized with the start address
25394 align_rtx = alignment of the address.
25395 scratch = scratch register, initialized with the startaddress when
25396 not aligned, otherwise undefined
25398 This is just the body. It needs the initializations mentioned above and
25399 some address computing at the end. These things are done in i386.md. */
25401 static void
25402 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
25404 int align;
25405 rtx tmp;
25406 rtx_code_label *align_2_label = NULL;
25407 rtx_code_label *align_3_label = NULL;
25408 rtx_code_label *align_4_label = gen_label_rtx ();
25409 rtx_code_label *end_0_label = gen_label_rtx ();
25410 rtx mem;
25411 rtx tmpreg = gen_reg_rtx (SImode);
25412 rtx scratch = gen_reg_rtx (SImode);
25413 rtx cmp;
25415 align = 0;
25416 if (CONST_INT_P (align_rtx))
25417 align = INTVAL (align_rtx);
25419 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
25421 /* Is there a known alignment and is it less than 4? */
25422 if (align < 4)
25424 rtx scratch1 = gen_reg_rtx (Pmode);
25425 emit_move_insn (scratch1, out);
25426 /* Is there a known alignment and is it not 2? */
25427 if (align != 2)
25429 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
25430 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
25432 /* Leave just the 3 lower bits. */
25433 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
25434 NULL_RTX, 0, OPTAB_WIDEN);
25436 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25437 Pmode, 1, align_4_label);
25438 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
25439 Pmode, 1, align_2_label);
25440 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
25441 Pmode, 1, align_3_label);
25443 else
25445 /* Since the alignment is 2, we have to check 2 or 0 bytes;
25446 check if is aligned to 4 - byte. */
25448 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
25449 NULL_RTX, 0, OPTAB_WIDEN);
25451 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25452 Pmode, 1, align_4_label);
25455 mem = change_address (src, QImode, out);
25457 /* Now compare the bytes. */
25459 /* Compare the first n unaligned byte on a byte per byte basis. */
25460 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
25461 QImode, 1, end_0_label);
25463 /* Increment the address. */
25464 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25466 /* Not needed with an alignment of 2 */
25467 if (align != 2)
25469 emit_label (align_2_label);
25471 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25472 end_0_label);
25474 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25476 emit_label (align_3_label);
25479 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25480 end_0_label);
25482 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25485 /* Generate loop to check 4 bytes at a time. It is not a good idea to
25486 align this loop. It gives only huge programs, but does not help to
25487 speed up. */
25488 emit_label (align_4_label);
25490 mem = change_address (src, SImode, out);
25491 emit_move_insn (scratch, mem);
25492 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
25494 /* This formula yields a nonzero result iff one of the bytes is zero.
25495 This saves three branches inside loop and many cycles. */
25497 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
25498 emit_insn (gen_one_cmplsi2 (scratch, scratch));
25499 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
25500 emit_insn (gen_andsi3 (tmpreg, tmpreg,
25501 gen_int_mode (0x80808080, SImode)));
25502 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
25503 align_4_label);
25505 if (TARGET_CMOVE)
25507 rtx reg = gen_reg_rtx (SImode);
25508 rtx reg2 = gen_reg_rtx (Pmode);
25509 emit_move_insn (reg, tmpreg);
25510 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
25512 /* If zero is not in the first two bytes, move two bytes forward. */
25513 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25514 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25515 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25516 emit_insn (gen_rtx_SET (tmpreg,
25517 gen_rtx_IF_THEN_ELSE (SImode, tmp,
25518 reg,
25519 tmpreg)));
25520 /* Emit lea manually to avoid clobbering of flags. */
25521 emit_insn (gen_rtx_SET (reg2, gen_rtx_PLUS (Pmode, out, const2_rtx)));
25523 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25524 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25525 emit_insn (gen_rtx_SET (out,
25526 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
25527 reg2,
25528 out)));
25530 else
25532 rtx_code_label *end_2_label = gen_label_rtx ();
25533 /* Is zero in the first two bytes? */
25535 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25536 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25537 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
25538 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
25539 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
25540 pc_rtx);
25541 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
25542 JUMP_LABEL (tmp) = end_2_label;
25544 /* Not in the first two. Move two bytes forward. */
25545 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
25546 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
25548 emit_label (end_2_label);
25552 /* Avoid branch in fixing the byte. */
25553 tmpreg = gen_lowpart (QImode, tmpreg);
25554 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
25555 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
25556 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
25557 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
25559 emit_label (end_0_label);
25562 /* Expand strlen. */
25564 bool
25565 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
25567 rtx addr, scratch1, scratch2, scratch3, scratch4;
25569 /* The generic case of strlen expander is long. Avoid it's
25570 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
25572 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25573 && !TARGET_INLINE_ALL_STRINGOPS
25574 && !optimize_insn_for_size_p ()
25575 && (!CONST_INT_P (align) || INTVAL (align) < 4))
25576 return false;
25578 addr = force_reg (Pmode, XEXP (src, 0));
25579 scratch1 = gen_reg_rtx (Pmode);
25581 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25582 && !optimize_insn_for_size_p ())
25584 /* Well it seems that some optimizer does not combine a call like
25585 foo(strlen(bar), strlen(bar));
25586 when the move and the subtraction is done here. It does calculate
25587 the length just once when these instructions are done inside of
25588 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
25589 often used and I use one fewer register for the lifetime of
25590 output_strlen_unroll() this is better. */
25592 emit_move_insn (out, addr);
25594 ix86_expand_strlensi_unroll_1 (out, src, align);
25596 /* strlensi_unroll_1 returns the address of the zero at the end of
25597 the string, like memchr(), so compute the length by subtracting
25598 the start address. */
25599 emit_insn (ix86_gen_sub3 (out, out, addr));
25601 else
25603 rtx unspec;
25605 /* Can't use this if the user has appropriated eax, ecx, or edi. */
25606 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
25607 return false;
25609 scratch2 = gen_reg_rtx (Pmode);
25610 scratch3 = gen_reg_rtx (Pmode);
25611 scratch4 = force_reg (Pmode, constm1_rtx);
25613 emit_move_insn (scratch3, addr);
25614 eoschar = force_reg (QImode, eoschar);
25616 src = replace_equiv_address_nv (src, scratch3);
25618 /* If .md starts supporting :P, this can be done in .md. */
25619 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
25620 scratch4), UNSPEC_SCAS);
25621 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
25622 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
25623 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
25625 return true;
25628 /* For given symbol (function) construct code to compute address of it's PLT
25629 entry in large x86-64 PIC model. */
25630 static rtx
25631 construct_plt_address (rtx symbol)
25633 rtx tmp, unspec;
25635 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
25636 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
25637 gcc_assert (Pmode == DImode);
25639 tmp = gen_reg_rtx (Pmode);
25640 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
25642 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
25643 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
25644 return tmp;
25648 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
25649 rtx callarg2,
25650 rtx pop, bool sibcall)
25652 rtx vec[3];
25653 rtx use = NULL, call;
25654 unsigned int vec_len = 0;
25656 if (pop == const0_rtx)
25657 pop = NULL;
25658 gcc_assert (!TARGET_64BIT || !pop);
25660 if (TARGET_MACHO && !TARGET_64BIT)
25662 #if TARGET_MACHO
25663 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
25664 fnaddr = machopic_indirect_call_target (fnaddr);
25665 #endif
25667 else
25669 /* Static functions and indirect calls don't need the pic register. Also,
25670 check if PLT was explicitly avoided via no-plt or "noplt" attribute, making
25671 it an indirect call. */
25672 if (flag_pic
25673 && (!TARGET_64BIT
25674 || (ix86_cmodel == CM_LARGE_PIC
25675 && DEFAULT_ABI != MS_ABI))
25676 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25677 && !SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0))
25678 && flag_plt
25679 && (SYMBOL_REF_DECL ((XEXP (fnaddr, 0))) == NULL_TREE
25680 || !lookup_attribute ("noplt",
25681 DECL_ATTRIBUTES (SYMBOL_REF_DECL (XEXP (fnaddr, 0))))))
25683 use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
25684 if (ix86_use_pseudo_pic_reg ())
25685 emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM),
25686 pic_offset_table_rtx);
25690 /* Skip setting up RAX register for -mskip-rax-setup when there are no
25691 parameters passed in vector registers. */
25692 if (TARGET_64BIT
25693 && (INTVAL (callarg2) > 0
25694 || (INTVAL (callarg2) == 0
25695 && (TARGET_SSE || !flag_skip_rax_setup))))
25697 rtx al = gen_rtx_REG (QImode, AX_REG);
25698 emit_move_insn (al, callarg2);
25699 use_reg (&use, al);
25702 if (ix86_cmodel == CM_LARGE_PIC
25703 && !TARGET_PECOFF
25704 && MEM_P (fnaddr)
25705 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25706 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
25707 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
25708 else if (sibcall
25709 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
25710 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
25712 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
25713 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
25716 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
25718 if (retval)
25720 /* We should add bounds as destination register in case
25721 pointer with bounds may be returned. */
25722 if (TARGET_MPX && SCALAR_INT_MODE_P (GET_MODE (retval)))
25724 rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG);
25725 rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1);
25726 if (GET_CODE (retval) == PARALLEL)
25728 b0 = gen_rtx_EXPR_LIST (VOIDmode, b0, const0_rtx);
25729 b1 = gen_rtx_EXPR_LIST (VOIDmode, b1, const0_rtx);
25730 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, b0, b1));
25731 retval = chkp_join_splitted_slot (retval, par);
25733 else
25735 retval = gen_rtx_PARALLEL (VOIDmode,
25736 gen_rtvec (3, retval, b0, b1));
25737 chkp_put_regs_to_expr_list (retval);
25741 call = gen_rtx_SET (retval, call);
25743 vec[vec_len++] = call;
25745 if (pop)
25747 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
25748 pop = gen_rtx_SET (stack_pointer_rtx, pop);
25749 vec[vec_len++] = pop;
25752 if (TARGET_64BIT_MS_ABI
25753 && (!callarg2 || INTVAL (callarg2) != -2))
25755 int const cregs_size
25756 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
25757 int i;
25759 for (i = 0; i < cregs_size; i++)
25761 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
25762 machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
25764 clobber_reg (&use, gen_rtx_REG (mode, regno));
25768 if (vec_len > 1)
25769 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
25770 call = emit_call_insn (call);
25771 if (use)
25772 CALL_INSN_FUNCTION_USAGE (call) = use;
25774 return call;
25777 /* Return true if the function being called was marked with attribute "noplt"
25778 or using -fno-plt and we are compiling for non-PIC and x86_64. We need to
25779 handle the non-PIC case in the backend because there is no easy interface
25780 for the front-end to force non-PLT calls to use the GOT. This is currently
25781 used only with 64-bit ELF targets to call the function marked "noplt"
25782 indirectly. */
25784 static bool
25785 ix86_nopic_noplt_attribute_p (rtx call_op)
25787 if (flag_pic || ix86_cmodel == CM_LARGE
25788 || !TARGET_64BIT || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
25789 || SYMBOL_REF_LOCAL_P (call_op))
25790 return false;
25792 tree symbol_decl = SYMBOL_REF_DECL (call_op);
25794 if (!flag_plt
25795 || (symbol_decl != NULL_TREE
25796 && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl))))
25797 return true;
25799 return false;
25802 /* Output the assembly for a call instruction. */
25804 const char *
25805 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
25807 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
25808 bool seh_nop_p = false;
25809 const char *xasm;
25811 if (SIBLING_CALL_P (insn))
25813 if (direct_p && ix86_nopic_noplt_attribute_p (call_op))
25814 xasm = "%!jmp\t*%p0@GOTPCREL(%%rip)";
25815 else if (direct_p)
25816 xasm = "%!jmp\t%P0";
25817 /* SEH epilogue detection requires the indirect branch case
25818 to include REX.W. */
25819 else if (TARGET_SEH)
25820 xasm = "%!rex.W jmp %A0";
25821 else
25822 xasm = "%!jmp\t%A0";
25824 output_asm_insn (xasm, &call_op);
25825 return "";
25828 /* SEH unwinding can require an extra nop to be emitted in several
25829 circumstances. Determine if we have one of those. */
25830 if (TARGET_SEH)
25832 rtx_insn *i;
25834 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
25836 /* If we get to another real insn, we don't need the nop. */
25837 if (INSN_P (i))
25838 break;
25840 /* If we get to the epilogue note, prevent a catch region from
25841 being adjacent to the standard epilogue sequence. If non-
25842 call-exceptions, we'll have done this during epilogue emission. */
25843 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
25844 && !flag_non_call_exceptions
25845 && !can_throw_internal (insn))
25847 seh_nop_p = true;
25848 break;
25852 /* If we didn't find a real insn following the call, prevent the
25853 unwinder from looking into the next function. */
25854 if (i == NULL)
25855 seh_nop_p = true;
25858 if (direct_p && ix86_nopic_noplt_attribute_p (call_op))
25859 xasm = "%!call\t*%p0@GOTPCREL(%%rip)";
25860 else if (direct_p)
25861 xasm = "%!call\t%P0";
25862 else
25863 xasm = "%!call\t%A0";
25865 output_asm_insn (xasm, &call_op);
25867 if (seh_nop_p)
25868 return "nop";
25870 return "";
25873 /* Clear stack slot assignments remembered from previous functions.
25874 This is called from INIT_EXPANDERS once before RTL is emitted for each
25875 function. */
25877 static struct machine_function *
25878 ix86_init_machine_status (void)
25880 struct machine_function *f;
25882 f = ggc_cleared_alloc<machine_function> ();
25883 f->use_fast_prologue_epilogue_nregs = -1;
25884 f->call_abi = ix86_abi;
25886 return f;
25889 /* Return a MEM corresponding to a stack slot with mode MODE.
25890 Allocate a new slot if necessary.
25892 The RTL for a function can have several slots available: N is
25893 which slot to use. */
25896 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
25898 struct stack_local_entry *s;
25900 gcc_assert (n < MAX_386_STACK_LOCALS);
25902 for (s = ix86_stack_locals; s; s = s->next)
25903 if (s->mode == mode && s->n == n)
25904 return validize_mem (copy_rtx (s->rtl));
25906 s = ggc_alloc<stack_local_entry> ();
25907 s->n = n;
25908 s->mode = mode;
25909 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25911 s->next = ix86_stack_locals;
25912 ix86_stack_locals = s;
25913 return validize_mem (copy_rtx (s->rtl));
25916 static void
25917 ix86_instantiate_decls (void)
25919 struct stack_local_entry *s;
25921 for (s = ix86_stack_locals; s; s = s->next)
25922 if (s->rtl != NULL_RTX)
25923 instantiate_decl_rtl (s->rtl);
25926 /* Check whether x86 address PARTS is a pc-relative address. */
25928 static bool
25929 rip_relative_addr_p (struct ix86_address *parts)
25931 rtx base, index, disp;
25933 base = parts->base;
25934 index = parts->index;
25935 disp = parts->disp;
25937 if (disp && !base && !index)
25939 if (TARGET_64BIT)
25941 rtx symbol = disp;
25943 if (GET_CODE (disp) == CONST)
25944 symbol = XEXP (disp, 0);
25945 if (GET_CODE (symbol) == PLUS
25946 && CONST_INT_P (XEXP (symbol, 1)))
25947 symbol = XEXP (symbol, 0);
25949 if (GET_CODE (symbol) == LABEL_REF
25950 || (GET_CODE (symbol) == SYMBOL_REF
25951 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25952 || (GET_CODE (symbol) == UNSPEC
25953 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25954 || XINT (symbol, 1) == UNSPEC_PCREL
25955 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25956 return true;
25959 return false;
25962 /* Calculate the length of the memory address in the instruction encoding.
25963 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25964 or other prefixes. We never generate addr32 prefix for LEA insn. */
25967 memory_address_length (rtx addr, bool lea)
25969 struct ix86_address parts;
25970 rtx base, index, disp;
25971 int len;
25972 int ok;
25974 if (GET_CODE (addr) == PRE_DEC
25975 || GET_CODE (addr) == POST_INC
25976 || GET_CODE (addr) == PRE_MODIFY
25977 || GET_CODE (addr) == POST_MODIFY)
25978 return 0;
25980 ok = ix86_decompose_address (addr, &parts);
25981 gcc_assert (ok);
25983 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25985 /* If this is not LEA instruction, add the length of addr32 prefix. */
25986 if (TARGET_64BIT && !lea
25987 && (SImode_address_operand (addr, VOIDmode)
25988 || (parts.base && GET_MODE (parts.base) == SImode)
25989 || (parts.index && GET_MODE (parts.index) == SImode)))
25990 len++;
25992 base = parts.base;
25993 index = parts.index;
25994 disp = parts.disp;
25996 if (base && GET_CODE (base) == SUBREG)
25997 base = SUBREG_REG (base);
25998 if (index && GET_CODE (index) == SUBREG)
25999 index = SUBREG_REG (index);
26001 gcc_assert (base == NULL_RTX || REG_P (base));
26002 gcc_assert (index == NULL_RTX || REG_P (index));
26004 /* Rule of thumb:
26005 - esp as the base always wants an index,
26006 - ebp as the base always wants a displacement,
26007 - r12 as the base always wants an index,
26008 - r13 as the base always wants a displacement. */
26010 /* Register Indirect. */
26011 if (base && !index && !disp)
26013 /* esp (for its index) and ebp (for its displacement) need
26014 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
26015 code. */
26016 if (base == arg_pointer_rtx
26017 || base == frame_pointer_rtx
26018 || REGNO (base) == SP_REG
26019 || REGNO (base) == BP_REG
26020 || REGNO (base) == R12_REG
26021 || REGNO (base) == R13_REG)
26022 len++;
26025 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
26026 is not disp32, but disp32(%rip), so for disp32
26027 SIB byte is needed, unless print_operand_address
26028 optimizes it into disp32(%rip) or (%rip) is implied
26029 by UNSPEC. */
26030 else if (disp && !base && !index)
26032 len += 4;
26033 if (rip_relative_addr_p (&parts))
26034 len++;
26036 else
26038 /* Find the length of the displacement constant. */
26039 if (disp)
26041 if (base && satisfies_constraint_K (disp))
26042 len += 1;
26043 else
26044 len += 4;
26046 /* ebp always wants a displacement. Similarly r13. */
26047 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
26048 len++;
26050 /* An index requires the two-byte modrm form.... */
26051 if (index
26052 /* ...like esp (or r12), which always wants an index. */
26053 || base == arg_pointer_rtx
26054 || base == frame_pointer_rtx
26055 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
26056 len++;
26059 return len;
26062 /* Compute default value for "length_immediate" attribute. When SHORTFORM
26063 is set, expect that insn have 8bit immediate alternative. */
26065 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
26067 int len = 0;
26068 int i;
26069 extract_insn_cached (insn);
26070 for (i = recog_data.n_operands - 1; i >= 0; --i)
26071 if (CONSTANT_P (recog_data.operand[i]))
26073 enum attr_mode mode = get_attr_mode (insn);
26075 gcc_assert (!len);
26076 if (shortform && CONST_INT_P (recog_data.operand[i]))
26078 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
26079 switch (mode)
26081 case MODE_QI:
26082 len = 1;
26083 continue;
26084 case MODE_HI:
26085 ival = trunc_int_for_mode (ival, HImode);
26086 break;
26087 case MODE_SI:
26088 ival = trunc_int_for_mode (ival, SImode);
26089 break;
26090 default:
26091 break;
26093 if (IN_RANGE (ival, -128, 127))
26095 len = 1;
26096 continue;
26099 switch (mode)
26101 case MODE_QI:
26102 len = 1;
26103 break;
26104 case MODE_HI:
26105 len = 2;
26106 break;
26107 case MODE_SI:
26108 len = 4;
26109 break;
26110 /* Immediates for DImode instructions are encoded
26111 as 32bit sign extended values. */
26112 case MODE_DI:
26113 len = 4;
26114 break;
26115 default:
26116 fatal_insn ("unknown insn mode", insn);
26119 return len;
26122 /* Compute default value for "length_address" attribute. */
26124 ix86_attr_length_address_default (rtx_insn *insn)
26126 int i;
26128 if (get_attr_type (insn) == TYPE_LEA)
26130 rtx set = PATTERN (insn), addr;
26132 if (GET_CODE (set) == PARALLEL)
26133 set = XVECEXP (set, 0, 0);
26135 gcc_assert (GET_CODE (set) == SET);
26137 addr = SET_SRC (set);
26139 return memory_address_length (addr, true);
26142 extract_insn_cached (insn);
26143 for (i = recog_data.n_operands - 1; i >= 0; --i)
26144 if (MEM_P (recog_data.operand[i]))
26146 constrain_operands_cached (insn, reload_completed);
26147 if (which_alternative != -1)
26149 const char *constraints = recog_data.constraints[i];
26150 int alt = which_alternative;
26152 while (*constraints == '=' || *constraints == '+')
26153 constraints++;
26154 while (alt-- > 0)
26155 while (*constraints++ != ',')
26157 /* Skip ignored operands. */
26158 if (*constraints == 'X')
26159 continue;
26161 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
26163 return 0;
26166 /* Compute default value for "length_vex" attribute. It includes
26167 2 or 3 byte VEX prefix and 1 opcode byte. */
26170 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
26171 bool has_vex_w)
26173 int i;
26175 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
26176 byte VEX prefix. */
26177 if (!has_0f_opcode || has_vex_w)
26178 return 3 + 1;
26180 /* We can always use 2 byte VEX prefix in 32bit. */
26181 if (!TARGET_64BIT)
26182 return 2 + 1;
26184 extract_insn_cached (insn);
26186 for (i = recog_data.n_operands - 1; i >= 0; --i)
26187 if (REG_P (recog_data.operand[i]))
26189 /* REX.W bit uses 3 byte VEX prefix. */
26190 if (GET_MODE (recog_data.operand[i]) == DImode
26191 && GENERAL_REG_P (recog_data.operand[i]))
26192 return 3 + 1;
26194 else
26196 /* REX.X or REX.B bits use 3 byte VEX prefix. */
26197 if (MEM_P (recog_data.operand[i])
26198 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
26199 return 3 + 1;
26202 return 2 + 1;
26205 /* Return the maximum number of instructions a cpu can issue. */
26207 static int
26208 ix86_issue_rate (void)
26210 switch (ix86_tune)
26212 case PROCESSOR_PENTIUM:
26213 case PROCESSOR_IAMCU:
26214 case PROCESSOR_BONNELL:
26215 case PROCESSOR_SILVERMONT:
26216 case PROCESSOR_KNL:
26217 case PROCESSOR_INTEL:
26218 case PROCESSOR_K6:
26219 case PROCESSOR_BTVER2:
26220 case PROCESSOR_PENTIUM4:
26221 case PROCESSOR_NOCONA:
26222 return 2;
26224 case PROCESSOR_PENTIUMPRO:
26225 case PROCESSOR_ATHLON:
26226 case PROCESSOR_K8:
26227 case PROCESSOR_AMDFAM10:
26228 case PROCESSOR_GENERIC:
26229 case PROCESSOR_BTVER1:
26230 return 3;
26232 case PROCESSOR_BDVER1:
26233 case PROCESSOR_BDVER2:
26234 case PROCESSOR_BDVER3:
26235 case PROCESSOR_BDVER4:
26236 case PROCESSOR_CORE2:
26237 case PROCESSOR_NEHALEM:
26238 case PROCESSOR_SANDYBRIDGE:
26239 case PROCESSOR_HASWELL:
26240 return 4;
26242 default:
26243 return 1;
26247 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
26248 by DEP_INSN and nothing set by DEP_INSN. */
26250 static bool
26251 ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
26253 rtx set, set2;
26255 /* Simplify the test for uninteresting insns. */
26256 if (insn_type != TYPE_SETCC
26257 && insn_type != TYPE_ICMOV
26258 && insn_type != TYPE_FCMOV
26259 && insn_type != TYPE_IBR)
26260 return false;
26262 if ((set = single_set (dep_insn)) != 0)
26264 set = SET_DEST (set);
26265 set2 = NULL_RTX;
26267 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
26268 && XVECLEN (PATTERN (dep_insn), 0) == 2
26269 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
26270 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
26272 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
26273 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
26275 else
26276 return false;
26278 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
26279 return false;
26281 /* This test is true if the dependent insn reads the flags but
26282 not any other potentially set register. */
26283 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
26284 return false;
26286 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
26287 return false;
26289 return true;
26292 /* Return true iff USE_INSN has a memory address with operands set by
26293 SET_INSN. */
26295 bool
26296 ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
26298 int i;
26299 extract_insn_cached (use_insn);
26300 for (i = recog_data.n_operands - 1; i >= 0; --i)
26301 if (MEM_P (recog_data.operand[i]))
26303 rtx addr = XEXP (recog_data.operand[i], 0);
26304 return modified_in_p (addr, set_insn) != 0;
26306 return false;
26309 /* Helper function for exact_store_load_dependency.
26310 Return true if addr is found in insn. */
26311 static bool
26312 exact_dependency_1 (rtx addr, rtx insn)
26314 enum rtx_code code;
26315 const char *format_ptr;
26316 int i, j;
26318 code = GET_CODE (insn);
26319 switch (code)
26321 case MEM:
26322 if (rtx_equal_p (addr, insn))
26323 return true;
26324 break;
26325 case REG:
26326 CASE_CONST_ANY:
26327 case SYMBOL_REF:
26328 case CODE_LABEL:
26329 case PC:
26330 case CC0:
26331 case EXPR_LIST:
26332 return false;
26333 default:
26334 break;
26337 format_ptr = GET_RTX_FORMAT (code);
26338 for (i = 0; i < GET_RTX_LENGTH (code); i++)
26340 switch (*format_ptr++)
26342 case 'e':
26343 if (exact_dependency_1 (addr, XEXP (insn, i)))
26344 return true;
26345 break;
26346 case 'E':
26347 for (j = 0; j < XVECLEN (insn, i); j++)
26348 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
26349 return true;
26350 break;
26353 return false;
26356 /* Return true if there exists exact dependency for store & load, i.e.
26357 the same memory address is used in them. */
26358 static bool
26359 exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
26361 rtx set1, set2;
26363 set1 = single_set (store);
26364 if (!set1)
26365 return false;
26366 if (!MEM_P (SET_DEST (set1)))
26367 return false;
26368 set2 = single_set (load);
26369 if (!set2)
26370 return false;
26371 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
26372 return true;
26373 return false;
26376 static int
26377 ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
26379 enum attr_type insn_type, dep_insn_type;
26380 enum attr_memory memory;
26381 rtx set, set2;
26382 int dep_insn_code_number;
26384 /* Anti and output dependencies have zero cost on all CPUs. */
26385 if (REG_NOTE_KIND (link) != 0)
26386 return 0;
26388 dep_insn_code_number = recog_memoized (dep_insn);
26390 /* If we can't recognize the insns, we can't really do anything. */
26391 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
26392 return cost;
26394 insn_type = get_attr_type (insn);
26395 dep_insn_type = get_attr_type (dep_insn);
26397 switch (ix86_tune)
26399 case PROCESSOR_PENTIUM:
26400 case PROCESSOR_IAMCU:
26401 /* Address Generation Interlock adds a cycle of latency. */
26402 if (insn_type == TYPE_LEA)
26404 rtx addr = PATTERN (insn);
26406 if (GET_CODE (addr) == PARALLEL)
26407 addr = XVECEXP (addr, 0, 0);
26409 gcc_assert (GET_CODE (addr) == SET);
26411 addr = SET_SRC (addr);
26412 if (modified_in_p (addr, dep_insn))
26413 cost += 1;
26415 else if (ix86_agi_dependent (dep_insn, insn))
26416 cost += 1;
26418 /* ??? Compares pair with jump/setcc. */
26419 if (ix86_flags_dependent (insn, dep_insn, insn_type))
26420 cost = 0;
26422 /* Floating point stores require value to be ready one cycle earlier. */
26423 if (insn_type == TYPE_FMOV
26424 && get_attr_memory (insn) == MEMORY_STORE
26425 && !ix86_agi_dependent (dep_insn, insn))
26426 cost += 1;
26427 break;
26429 case PROCESSOR_PENTIUMPRO:
26430 /* INT->FP conversion is expensive. */
26431 if (get_attr_fp_int_src (dep_insn))
26432 cost += 5;
26434 /* There is one cycle extra latency between an FP op and a store. */
26435 if (insn_type == TYPE_FMOV
26436 && (set = single_set (dep_insn)) != NULL_RTX
26437 && (set2 = single_set (insn)) != NULL_RTX
26438 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
26439 && MEM_P (SET_DEST (set2)))
26440 cost += 1;
26442 memory = get_attr_memory (insn);
26444 /* Show ability of reorder buffer to hide latency of load by executing
26445 in parallel with previous instruction in case
26446 previous instruction is not needed to compute the address. */
26447 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26448 && !ix86_agi_dependent (dep_insn, insn))
26450 /* Claim moves to take one cycle, as core can issue one load
26451 at time and the next load can start cycle later. */
26452 if (dep_insn_type == TYPE_IMOV
26453 || dep_insn_type == TYPE_FMOV)
26454 cost = 1;
26455 else if (cost > 1)
26456 cost--;
26458 break;
26460 case PROCESSOR_K6:
26461 /* The esp dependency is resolved before
26462 the instruction is really finished. */
26463 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26464 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26465 return 1;
26467 /* INT->FP conversion is expensive. */
26468 if (get_attr_fp_int_src (dep_insn))
26469 cost += 5;
26471 memory = get_attr_memory (insn);
26473 /* Show ability of reorder buffer to hide latency of load by executing
26474 in parallel with previous instruction in case
26475 previous instruction is not needed to compute the address. */
26476 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26477 && !ix86_agi_dependent (dep_insn, insn))
26479 /* Claim moves to take one cycle, as core can issue one load
26480 at time and the next load can start cycle later. */
26481 if (dep_insn_type == TYPE_IMOV
26482 || dep_insn_type == TYPE_FMOV)
26483 cost = 1;
26484 else if (cost > 2)
26485 cost -= 2;
26486 else
26487 cost = 1;
26489 break;
26491 case PROCESSOR_AMDFAM10:
26492 case PROCESSOR_BDVER1:
26493 case PROCESSOR_BDVER2:
26494 case PROCESSOR_BDVER3:
26495 case PROCESSOR_BDVER4:
26496 case PROCESSOR_BTVER1:
26497 case PROCESSOR_BTVER2:
26498 case PROCESSOR_GENERIC:
26499 /* Stack engine allows to execute push&pop instructions in parall. */
26500 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26501 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26502 return 0;
26503 /* FALLTHRU */
26505 case PROCESSOR_ATHLON:
26506 case PROCESSOR_K8:
26507 memory = get_attr_memory (insn);
26509 /* Show ability of reorder buffer to hide latency of load by executing
26510 in parallel with previous instruction in case
26511 previous instruction is not needed to compute the address. */
26512 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26513 && !ix86_agi_dependent (dep_insn, insn))
26515 enum attr_unit unit = get_attr_unit (insn);
26516 int loadcost = 3;
26518 /* Because of the difference between the length of integer and
26519 floating unit pipeline preparation stages, the memory operands
26520 for floating point are cheaper.
26522 ??? For Athlon it the difference is most probably 2. */
26523 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
26524 loadcost = 3;
26525 else
26526 loadcost = TARGET_ATHLON ? 2 : 0;
26528 if (cost >= loadcost)
26529 cost -= loadcost;
26530 else
26531 cost = 0;
26533 break;
26535 case PROCESSOR_CORE2:
26536 case PROCESSOR_NEHALEM:
26537 case PROCESSOR_SANDYBRIDGE:
26538 case PROCESSOR_HASWELL:
26539 /* Stack engine allows to execute push&pop instructions in parall. */
26540 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26541 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26542 return 0;
26544 memory = get_attr_memory (insn);
26546 /* Show ability of reorder buffer to hide latency of load by executing
26547 in parallel with previous instruction in case
26548 previous instruction is not needed to compute the address. */
26549 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26550 && !ix86_agi_dependent (dep_insn, insn))
26552 if (cost >= 4)
26553 cost -= 4;
26554 else
26555 cost = 0;
26557 break;
26559 case PROCESSOR_SILVERMONT:
26560 case PROCESSOR_KNL:
26561 case PROCESSOR_INTEL:
26562 if (!reload_completed)
26563 return cost;
26565 /* Increase cost of integer loads. */
26566 memory = get_attr_memory (dep_insn);
26567 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26569 enum attr_unit unit = get_attr_unit (dep_insn);
26570 if (unit == UNIT_INTEGER && cost == 1)
26572 if (memory == MEMORY_LOAD)
26573 cost = 3;
26574 else
26576 /* Increase cost of ld/st for short int types only
26577 because of store forwarding issue. */
26578 rtx set = single_set (dep_insn);
26579 if (set && (GET_MODE (SET_DEST (set)) == QImode
26580 || GET_MODE (SET_DEST (set)) == HImode))
26582 /* Increase cost of store/load insn if exact
26583 dependence exists and it is load insn. */
26584 enum attr_memory insn_memory = get_attr_memory (insn);
26585 if (insn_memory == MEMORY_LOAD
26586 && exact_store_load_dependency (dep_insn, insn))
26587 cost = 3;
26593 default:
26594 break;
26597 return cost;
26600 /* How many alternative schedules to try. This should be as wide as the
26601 scheduling freedom in the DFA, but no wider. Making this value too
26602 large results extra work for the scheduler. */
26604 static int
26605 ia32_multipass_dfa_lookahead (void)
26607 switch (ix86_tune)
26609 case PROCESSOR_PENTIUM:
26610 case PROCESSOR_IAMCU:
26611 return 2;
26613 case PROCESSOR_PENTIUMPRO:
26614 case PROCESSOR_K6:
26615 return 1;
26617 case PROCESSOR_BDVER1:
26618 case PROCESSOR_BDVER2:
26619 case PROCESSOR_BDVER3:
26620 case PROCESSOR_BDVER4:
26621 /* We use lookahead value 4 for BD both before and after reload
26622 schedules. Plan is to have value 8 included for O3. */
26623 return 4;
26625 case PROCESSOR_CORE2:
26626 case PROCESSOR_NEHALEM:
26627 case PROCESSOR_SANDYBRIDGE:
26628 case PROCESSOR_HASWELL:
26629 case PROCESSOR_BONNELL:
26630 case PROCESSOR_SILVERMONT:
26631 case PROCESSOR_KNL:
26632 case PROCESSOR_INTEL:
26633 /* Generally, we want haifa-sched:max_issue() to look ahead as far
26634 as many instructions can be executed on a cycle, i.e.,
26635 issue_rate. I wonder why tuning for many CPUs does not do this. */
26636 if (reload_completed)
26637 return ix86_issue_rate ();
26638 /* Don't use lookahead for pre-reload schedule to save compile time. */
26639 return 0;
26641 default:
26642 return 0;
26646 /* Return true if target platform supports macro-fusion. */
26648 static bool
26649 ix86_macro_fusion_p ()
26651 return TARGET_FUSE_CMP_AND_BRANCH;
26654 /* Check whether current microarchitecture support macro fusion
26655 for insn pair "CONDGEN + CONDJMP". Refer to
26656 "Intel Architectures Optimization Reference Manual". */
26658 static bool
26659 ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
26661 rtx src, dest;
26662 enum rtx_code ccode;
26663 rtx compare_set = NULL_RTX, test_if, cond;
26664 rtx alu_set = NULL_RTX, addr = NULL_RTX;
26666 if (!any_condjump_p (condjmp))
26667 return false;
26669 if (get_attr_type (condgen) != TYPE_TEST
26670 && get_attr_type (condgen) != TYPE_ICMP
26671 && get_attr_type (condgen) != TYPE_INCDEC
26672 && get_attr_type (condgen) != TYPE_ALU)
26673 return false;
26675 compare_set = single_set (condgen);
26676 if (compare_set == NULL_RTX
26677 && !TARGET_FUSE_ALU_AND_BRANCH)
26678 return false;
26680 if (compare_set == NULL_RTX)
26682 int i;
26683 rtx pat = PATTERN (condgen);
26684 for (i = 0; i < XVECLEN (pat, 0); i++)
26685 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
26687 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
26688 if (GET_CODE (set_src) == COMPARE)
26689 compare_set = XVECEXP (pat, 0, i);
26690 else
26691 alu_set = XVECEXP (pat, 0, i);
26694 if (compare_set == NULL_RTX)
26695 return false;
26696 src = SET_SRC (compare_set);
26697 if (GET_CODE (src) != COMPARE)
26698 return false;
26700 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
26701 supported. */
26702 if ((MEM_P (XEXP (src, 0))
26703 && CONST_INT_P (XEXP (src, 1)))
26704 || (MEM_P (XEXP (src, 1))
26705 && CONST_INT_P (XEXP (src, 0))))
26706 return false;
26708 /* No fusion for RIP-relative address. */
26709 if (MEM_P (XEXP (src, 0)))
26710 addr = XEXP (XEXP (src, 0), 0);
26711 else if (MEM_P (XEXP (src, 1)))
26712 addr = XEXP (XEXP (src, 1), 0);
26714 if (addr) {
26715 ix86_address parts;
26716 int ok = ix86_decompose_address (addr, &parts);
26717 gcc_assert (ok);
26719 if (rip_relative_addr_p (&parts))
26720 return false;
26723 test_if = SET_SRC (pc_set (condjmp));
26724 cond = XEXP (test_if, 0);
26725 ccode = GET_CODE (cond);
26726 /* Check whether conditional jump use Sign or Overflow Flags. */
26727 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
26728 && (ccode == GE
26729 || ccode == GT
26730 || ccode == LE
26731 || ccode == LT))
26732 return false;
26734 /* Return true for TYPE_TEST and TYPE_ICMP. */
26735 if (get_attr_type (condgen) == TYPE_TEST
26736 || get_attr_type (condgen) == TYPE_ICMP)
26737 return true;
26739 /* The following is the case that macro-fusion for alu + jmp. */
26740 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
26741 return false;
26743 /* No fusion for alu op with memory destination operand. */
26744 dest = SET_DEST (alu_set);
26745 if (MEM_P (dest))
26746 return false;
26748 /* Macro-fusion for inc/dec + unsigned conditional jump is not
26749 supported. */
26750 if (get_attr_type (condgen) == TYPE_INCDEC
26751 && (ccode == GEU
26752 || ccode == GTU
26753 || ccode == LEU
26754 || ccode == LTU))
26755 return false;
26757 return true;
26760 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
26761 execution. It is applied if
26762 (1) IMUL instruction is on the top of list;
26763 (2) There exists the only producer of independent IMUL instruction in
26764 ready list.
26765 Return index of IMUL producer if it was found and -1 otherwise. */
26766 static int
26767 do_reorder_for_imul (rtx_insn **ready, int n_ready)
26769 rtx_insn *insn;
26770 rtx set, insn1, insn2;
26771 sd_iterator_def sd_it;
26772 dep_t dep;
26773 int index = -1;
26774 int i;
26776 if (!TARGET_BONNELL)
26777 return index;
26779 /* Check that IMUL instruction is on the top of ready list. */
26780 insn = ready[n_ready - 1];
26781 set = single_set (insn);
26782 if (!set)
26783 return index;
26784 if (!(GET_CODE (SET_SRC (set)) == MULT
26785 && GET_MODE (SET_SRC (set)) == SImode))
26786 return index;
26788 /* Search for producer of independent IMUL instruction. */
26789 for (i = n_ready - 2; i >= 0; i--)
26791 insn = ready[i];
26792 if (!NONDEBUG_INSN_P (insn))
26793 continue;
26794 /* Skip IMUL instruction. */
26795 insn2 = PATTERN (insn);
26796 if (GET_CODE (insn2) == PARALLEL)
26797 insn2 = XVECEXP (insn2, 0, 0);
26798 if (GET_CODE (insn2) == SET
26799 && GET_CODE (SET_SRC (insn2)) == MULT
26800 && GET_MODE (SET_SRC (insn2)) == SImode)
26801 continue;
26803 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
26805 rtx con;
26806 con = DEP_CON (dep);
26807 if (!NONDEBUG_INSN_P (con))
26808 continue;
26809 insn1 = PATTERN (con);
26810 if (GET_CODE (insn1) == PARALLEL)
26811 insn1 = XVECEXP (insn1, 0, 0);
26813 if (GET_CODE (insn1) == SET
26814 && GET_CODE (SET_SRC (insn1)) == MULT
26815 && GET_MODE (SET_SRC (insn1)) == SImode)
26817 sd_iterator_def sd_it1;
26818 dep_t dep1;
26819 /* Check if there is no other dependee for IMUL. */
26820 index = i;
26821 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
26823 rtx pro;
26824 pro = DEP_PRO (dep1);
26825 if (!NONDEBUG_INSN_P (pro))
26826 continue;
26827 if (pro != insn)
26828 index = -1;
26830 if (index >= 0)
26831 break;
26834 if (index >= 0)
26835 break;
26837 return index;
26840 /* Try to find the best candidate on the top of ready list if two insns
26841 have the same priority - candidate is best if its dependees were
26842 scheduled earlier. Applied for Silvermont only.
26843 Return true if top 2 insns must be interchanged. */
26844 static bool
26845 swap_top_of_ready_list (rtx_insn **ready, int n_ready)
26847 rtx_insn *top = ready[n_ready - 1];
26848 rtx_insn *next = ready[n_ready - 2];
26849 rtx set;
26850 sd_iterator_def sd_it;
26851 dep_t dep;
26852 int clock1 = -1;
26853 int clock2 = -1;
26854 #define INSN_TICK(INSN) (HID (INSN)->tick)
26856 if (!TARGET_SILVERMONT && !TARGET_INTEL)
26857 return false;
26859 if (!NONDEBUG_INSN_P (top))
26860 return false;
26861 if (!NONJUMP_INSN_P (top))
26862 return false;
26863 if (!NONDEBUG_INSN_P (next))
26864 return false;
26865 if (!NONJUMP_INSN_P (next))
26866 return false;
26867 set = single_set (top);
26868 if (!set)
26869 return false;
26870 set = single_set (next);
26871 if (!set)
26872 return false;
26874 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26876 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26877 return false;
26878 /* Determine winner more precise. */
26879 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26881 rtx pro;
26882 pro = DEP_PRO (dep);
26883 if (!NONDEBUG_INSN_P (pro))
26884 continue;
26885 if (INSN_TICK (pro) > clock1)
26886 clock1 = INSN_TICK (pro);
26888 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26890 rtx pro;
26891 pro = DEP_PRO (dep);
26892 if (!NONDEBUG_INSN_P (pro))
26893 continue;
26894 if (INSN_TICK (pro) > clock2)
26895 clock2 = INSN_TICK (pro);
26898 if (clock1 == clock2)
26900 /* Determine winner - load must win. */
26901 enum attr_memory memory1, memory2;
26902 memory1 = get_attr_memory (top);
26903 memory2 = get_attr_memory (next);
26904 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26905 return true;
26907 return (bool) (clock2 < clock1);
26909 return false;
26910 #undef INSN_TICK
26913 /* Perform possible reodering of ready list for Atom/Silvermont only.
26914 Return issue rate. */
26915 static int
26916 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
26917 int *pn_ready, int clock_var)
26919 int issue_rate = -1;
26920 int n_ready = *pn_ready;
26921 int i;
26922 rtx_insn *insn;
26923 int index = -1;
26925 /* Set up issue rate. */
26926 issue_rate = ix86_issue_rate ();
26928 /* Do reodering for BONNELL/SILVERMONT only. */
26929 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26930 return issue_rate;
26932 /* Nothing to do if ready list contains only 1 instruction. */
26933 if (n_ready <= 1)
26934 return issue_rate;
26936 /* Do reodering for post-reload scheduler only. */
26937 if (!reload_completed)
26938 return issue_rate;
26940 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26942 if (sched_verbose > 1)
26943 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26944 INSN_UID (ready[index]));
26946 /* Put IMUL producer (ready[index]) at the top of ready list. */
26947 insn = ready[index];
26948 for (i = index; i < n_ready - 1; i++)
26949 ready[i] = ready[i + 1];
26950 ready[n_ready - 1] = insn;
26951 return issue_rate;
26954 /* Skip selective scheduling since HID is not populated in it. */
26955 if (clock_var != 0
26956 && !sel_sched_p ()
26957 && swap_top_of_ready_list (ready, n_ready))
26959 if (sched_verbose > 1)
26960 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26961 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26962 /* Swap 2 top elements of ready list. */
26963 insn = ready[n_ready - 1];
26964 ready[n_ready - 1] = ready[n_ready - 2];
26965 ready[n_ready - 2] = insn;
26967 return issue_rate;
26970 static bool
26971 ix86_class_likely_spilled_p (reg_class_t);
26973 /* Returns true if lhs of insn is HW function argument register and set up
26974 is_spilled to true if it is likely spilled HW register. */
26975 static bool
26976 insn_is_function_arg (rtx insn, bool* is_spilled)
26978 rtx dst;
26980 if (!NONDEBUG_INSN_P (insn))
26981 return false;
26982 /* Call instructions are not movable, ignore it. */
26983 if (CALL_P (insn))
26984 return false;
26985 insn = PATTERN (insn);
26986 if (GET_CODE (insn) == PARALLEL)
26987 insn = XVECEXP (insn, 0, 0);
26988 if (GET_CODE (insn) != SET)
26989 return false;
26990 dst = SET_DEST (insn);
26991 if (REG_P (dst) && HARD_REGISTER_P (dst)
26992 && ix86_function_arg_regno_p (REGNO (dst)))
26994 /* Is it likely spilled HW register? */
26995 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26996 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26997 *is_spilled = true;
26998 return true;
27000 return false;
27003 /* Add output dependencies for chain of function adjacent arguments if only
27004 there is a move to likely spilled HW register. Return first argument
27005 if at least one dependence was added or NULL otherwise. */
27006 static rtx_insn *
27007 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
27009 rtx_insn *insn;
27010 rtx_insn *last = call;
27011 rtx_insn *first_arg = NULL;
27012 bool is_spilled = false;
27014 head = PREV_INSN (head);
27016 /* Find nearest to call argument passing instruction. */
27017 while (true)
27019 last = PREV_INSN (last);
27020 if (last == head)
27021 return NULL;
27022 if (!NONDEBUG_INSN_P (last))
27023 continue;
27024 if (insn_is_function_arg (last, &is_spilled))
27025 break;
27026 return NULL;
27029 first_arg = last;
27030 while (true)
27032 insn = PREV_INSN (last);
27033 if (!INSN_P (insn))
27034 break;
27035 if (insn == head)
27036 break;
27037 if (!NONDEBUG_INSN_P (insn))
27039 last = insn;
27040 continue;
27042 if (insn_is_function_arg (insn, &is_spilled))
27044 /* Add output depdendence between two function arguments if chain
27045 of output arguments contains likely spilled HW registers. */
27046 if (is_spilled)
27047 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
27048 first_arg = last = insn;
27050 else
27051 break;
27053 if (!is_spilled)
27054 return NULL;
27055 return first_arg;
27058 /* Add output or anti dependency from insn to first_arg to restrict its code
27059 motion. */
27060 static void
27061 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
27063 rtx set;
27064 rtx tmp;
27066 /* Add anti dependencies for bounds stores. */
27067 if (INSN_P (insn)
27068 && GET_CODE (PATTERN (insn)) == PARALLEL
27069 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
27070 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_BNDSTX)
27072 add_dependence (first_arg, insn, REG_DEP_ANTI);
27073 return;
27076 set = single_set (insn);
27077 if (!set)
27078 return;
27079 tmp = SET_DEST (set);
27080 if (REG_P (tmp))
27082 /* Add output dependency to the first function argument. */
27083 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
27084 return;
27086 /* Add anti dependency. */
27087 add_dependence (first_arg, insn, REG_DEP_ANTI);
27090 /* Avoid cross block motion of function argument through adding dependency
27091 from the first non-jump instruction in bb. */
27092 static void
27093 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
27095 rtx_insn *insn = BB_END (bb);
27097 while (insn)
27099 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
27101 rtx set = single_set (insn);
27102 if (set)
27104 avoid_func_arg_motion (arg, insn);
27105 return;
27108 if (insn == BB_HEAD (bb))
27109 return;
27110 insn = PREV_INSN (insn);
27114 /* Hook for pre-reload schedule - avoid motion of function arguments
27115 passed in likely spilled HW registers. */
27116 static void
27117 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
27119 rtx_insn *insn;
27120 rtx_insn *first_arg = NULL;
27121 if (reload_completed)
27122 return;
27123 while (head != tail && DEBUG_INSN_P (head))
27124 head = NEXT_INSN (head);
27125 for (insn = tail; insn != head; insn = PREV_INSN (insn))
27126 if (INSN_P (insn) && CALL_P (insn))
27128 first_arg = add_parameter_dependencies (insn, head);
27129 if (first_arg)
27131 /* Add dependee for first argument to predecessors if only
27132 region contains more than one block. */
27133 basic_block bb = BLOCK_FOR_INSN (insn);
27134 int rgn = CONTAINING_RGN (bb->index);
27135 int nr_blks = RGN_NR_BLOCKS (rgn);
27136 /* Skip trivial regions and region head blocks that can have
27137 predecessors outside of region. */
27138 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
27140 edge e;
27141 edge_iterator ei;
27143 /* Regions are SCCs with the exception of selective
27144 scheduling with pipelining of outer blocks enabled.
27145 So also check that immediate predecessors of a non-head
27146 block are in the same region. */
27147 FOR_EACH_EDGE (e, ei, bb->preds)
27149 /* Avoid creating of loop-carried dependencies through
27150 using topological ordering in the region. */
27151 if (rgn == CONTAINING_RGN (e->src->index)
27152 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
27153 add_dependee_for_func_arg (first_arg, e->src);
27156 insn = first_arg;
27157 if (insn == head)
27158 break;
27161 else if (first_arg)
27162 avoid_func_arg_motion (first_arg, insn);
27165 /* Hook for pre-reload schedule - set priority of moves from likely spilled
27166 HW registers to maximum, to schedule them at soon as possible. These are
27167 moves from function argument registers at the top of the function entry
27168 and moves from function return value registers after call. */
27169 static int
27170 ix86_adjust_priority (rtx_insn *insn, int priority)
27172 rtx set;
27174 if (reload_completed)
27175 return priority;
27177 if (!NONDEBUG_INSN_P (insn))
27178 return priority;
27180 set = single_set (insn);
27181 if (set)
27183 rtx tmp = SET_SRC (set);
27184 if (REG_P (tmp)
27185 && HARD_REGISTER_P (tmp)
27186 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
27187 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
27188 return current_sched_info->sched_max_insns_priority;
27191 return priority;
27194 /* Model decoder of Core 2/i7.
27195 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
27196 track the instruction fetch block boundaries and make sure that long
27197 (9+ bytes) instructions are assigned to D0. */
27199 /* Maximum length of an insn that can be handled by
27200 a secondary decoder unit. '8' for Core 2/i7. */
27201 static int core2i7_secondary_decoder_max_insn_size;
27203 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
27204 '16' for Core 2/i7. */
27205 static int core2i7_ifetch_block_size;
27207 /* Maximum number of instructions decoder can handle per cycle.
27208 '6' for Core 2/i7. */
27209 static int core2i7_ifetch_block_max_insns;
27211 typedef struct ix86_first_cycle_multipass_data_ *
27212 ix86_first_cycle_multipass_data_t;
27213 typedef const struct ix86_first_cycle_multipass_data_ *
27214 const_ix86_first_cycle_multipass_data_t;
27216 /* A variable to store target state across calls to max_issue within
27217 one cycle. */
27218 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
27219 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
27221 /* Initialize DATA. */
27222 static void
27223 core2i7_first_cycle_multipass_init (void *_data)
27225 ix86_first_cycle_multipass_data_t data
27226 = (ix86_first_cycle_multipass_data_t) _data;
27228 data->ifetch_block_len = 0;
27229 data->ifetch_block_n_insns = 0;
27230 data->ready_try_change = NULL;
27231 data->ready_try_change_size = 0;
27234 /* Advancing the cycle; reset ifetch block counts. */
27235 static void
27236 core2i7_dfa_post_advance_cycle (void)
27238 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
27240 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
27242 data->ifetch_block_len = 0;
27243 data->ifetch_block_n_insns = 0;
27246 static int min_insn_size (rtx_insn *);
27248 /* Filter out insns from ready_try that the core will not be able to issue
27249 on current cycle due to decoder. */
27250 static void
27251 core2i7_first_cycle_multipass_filter_ready_try
27252 (const_ix86_first_cycle_multipass_data_t data,
27253 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
27255 while (n_ready--)
27257 rtx_insn *insn;
27258 int insn_size;
27260 if (ready_try[n_ready])
27261 continue;
27263 insn = get_ready_element (n_ready);
27264 insn_size = min_insn_size (insn);
27266 if (/* If this is a too long an insn for a secondary decoder ... */
27267 (!first_cycle_insn_p
27268 && insn_size > core2i7_secondary_decoder_max_insn_size)
27269 /* ... or it would not fit into the ifetch block ... */
27270 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
27271 /* ... or the decoder is full already ... */
27272 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
27273 /* ... mask the insn out. */
27275 ready_try[n_ready] = 1;
27277 if (data->ready_try_change)
27278 bitmap_set_bit (data->ready_try_change, n_ready);
27283 /* Prepare for a new round of multipass lookahead scheduling. */
27284 static void
27285 core2i7_first_cycle_multipass_begin (void *_data,
27286 signed char *ready_try, int n_ready,
27287 bool first_cycle_insn_p)
27289 ix86_first_cycle_multipass_data_t data
27290 = (ix86_first_cycle_multipass_data_t) _data;
27291 const_ix86_first_cycle_multipass_data_t prev_data
27292 = ix86_first_cycle_multipass_data;
27294 /* Restore the state from the end of the previous round. */
27295 data->ifetch_block_len = prev_data->ifetch_block_len;
27296 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
27298 /* Filter instructions that cannot be issued on current cycle due to
27299 decoder restrictions. */
27300 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27301 first_cycle_insn_p);
27304 /* INSN is being issued in current solution. Account for its impact on
27305 the decoder model. */
27306 static void
27307 core2i7_first_cycle_multipass_issue (void *_data,
27308 signed char *ready_try, int n_ready,
27309 rtx_insn *insn, const void *_prev_data)
27311 ix86_first_cycle_multipass_data_t data
27312 = (ix86_first_cycle_multipass_data_t) _data;
27313 const_ix86_first_cycle_multipass_data_t prev_data
27314 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
27316 int insn_size = min_insn_size (insn);
27318 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
27319 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
27320 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
27321 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
27323 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
27324 if (!data->ready_try_change)
27326 data->ready_try_change = sbitmap_alloc (n_ready);
27327 data->ready_try_change_size = n_ready;
27329 else if (data->ready_try_change_size < n_ready)
27331 data->ready_try_change = sbitmap_resize (data->ready_try_change,
27332 n_ready, 0);
27333 data->ready_try_change_size = n_ready;
27335 bitmap_clear (data->ready_try_change);
27337 /* Filter out insns from ready_try that the core will not be able to issue
27338 on current cycle due to decoder. */
27339 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27340 false);
27343 /* Revert the effect on ready_try. */
27344 static void
27345 core2i7_first_cycle_multipass_backtrack (const void *_data,
27346 signed char *ready_try,
27347 int n_ready ATTRIBUTE_UNUSED)
27349 const_ix86_first_cycle_multipass_data_t data
27350 = (const_ix86_first_cycle_multipass_data_t) _data;
27351 unsigned int i = 0;
27352 sbitmap_iterator sbi;
27354 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
27355 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
27357 ready_try[i] = 0;
27361 /* Save the result of multipass lookahead scheduling for the next round. */
27362 static void
27363 core2i7_first_cycle_multipass_end (const void *_data)
27365 const_ix86_first_cycle_multipass_data_t data
27366 = (const_ix86_first_cycle_multipass_data_t) _data;
27367 ix86_first_cycle_multipass_data_t next_data
27368 = ix86_first_cycle_multipass_data;
27370 if (data != NULL)
27372 next_data->ifetch_block_len = data->ifetch_block_len;
27373 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
27377 /* Deallocate target data. */
27378 static void
27379 core2i7_first_cycle_multipass_fini (void *_data)
27381 ix86_first_cycle_multipass_data_t data
27382 = (ix86_first_cycle_multipass_data_t) _data;
27384 if (data->ready_try_change)
27386 sbitmap_free (data->ready_try_change);
27387 data->ready_try_change = NULL;
27388 data->ready_try_change_size = 0;
27392 /* Prepare for scheduling pass. */
27393 static void
27394 ix86_sched_init_global (FILE *, int, int)
27396 /* Install scheduling hooks for current CPU. Some of these hooks are used
27397 in time-critical parts of the scheduler, so we only set them up when
27398 they are actually used. */
27399 switch (ix86_tune)
27401 case PROCESSOR_CORE2:
27402 case PROCESSOR_NEHALEM:
27403 case PROCESSOR_SANDYBRIDGE:
27404 case PROCESSOR_HASWELL:
27405 /* Do not perform multipass scheduling for pre-reload schedule
27406 to save compile time. */
27407 if (reload_completed)
27409 targetm.sched.dfa_post_advance_cycle
27410 = core2i7_dfa_post_advance_cycle;
27411 targetm.sched.first_cycle_multipass_init
27412 = core2i7_first_cycle_multipass_init;
27413 targetm.sched.first_cycle_multipass_begin
27414 = core2i7_first_cycle_multipass_begin;
27415 targetm.sched.first_cycle_multipass_issue
27416 = core2i7_first_cycle_multipass_issue;
27417 targetm.sched.first_cycle_multipass_backtrack
27418 = core2i7_first_cycle_multipass_backtrack;
27419 targetm.sched.first_cycle_multipass_end
27420 = core2i7_first_cycle_multipass_end;
27421 targetm.sched.first_cycle_multipass_fini
27422 = core2i7_first_cycle_multipass_fini;
27424 /* Set decoder parameters. */
27425 core2i7_secondary_decoder_max_insn_size = 8;
27426 core2i7_ifetch_block_size = 16;
27427 core2i7_ifetch_block_max_insns = 6;
27428 break;
27430 /* ... Fall through ... */
27431 default:
27432 targetm.sched.dfa_post_advance_cycle = NULL;
27433 targetm.sched.first_cycle_multipass_init = NULL;
27434 targetm.sched.first_cycle_multipass_begin = NULL;
27435 targetm.sched.first_cycle_multipass_issue = NULL;
27436 targetm.sched.first_cycle_multipass_backtrack = NULL;
27437 targetm.sched.first_cycle_multipass_end = NULL;
27438 targetm.sched.first_cycle_multipass_fini = NULL;
27439 break;
27444 /* Compute the alignment given to a constant that is being placed in memory.
27445 EXP is the constant and ALIGN is the alignment that the object would
27446 ordinarily have.
27447 The value of this function is used instead of that alignment to align
27448 the object. */
27451 ix86_constant_alignment (tree exp, int align)
27453 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
27454 || TREE_CODE (exp) == INTEGER_CST)
27456 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
27457 return 64;
27458 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
27459 return 128;
27461 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
27462 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
27463 return BITS_PER_WORD;
27465 return align;
27468 /* Compute the alignment for a variable for Intel MCU psABI. TYPE is
27469 the data type, and ALIGN is the alignment that the object would
27470 ordinarily have. */
27472 static int
27473 iamcu_alignment (tree type, int align)
27475 enum machine_mode mode;
27477 if (align < 32 || TYPE_USER_ALIGN (type))
27478 return align;
27480 /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
27481 bytes. */
27482 mode = TYPE_MODE (strip_array_types (type));
27483 switch (GET_MODE_CLASS (mode))
27485 case MODE_INT:
27486 case MODE_COMPLEX_INT:
27487 case MODE_COMPLEX_FLOAT:
27488 case MODE_FLOAT:
27489 case MODE_DECIMAL_FLOAT:
27490 return 32;
27491 default:
27492 return align;
27496 /* Compute the alignment for a static variable.
27497 TYPE is the data type, and ALIGN is the alignment that
27498 the object would ordinarily have. The value of this function is used
27499 instead of that alignment to align the object. */
27502 ix86_data_alignment (tree type, int align, bool opt)
27504 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
27505 for symbols from other compilation units or symbols that don't need
27506 to bind locally. In order to preserve some ABI compatibility with
27507 those compilers, ensure we don't decrease alignment from what we
27508 used to assume. */
27510 int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
27512 /* A data structure, equal or greater than the size of a cache line
27513 (64 bytes in the Pentium 4 and other recent Intel processors, including
27514 processors based on Intel Core microarchitecture) should be aligned
27515 so that its base address is a multiple of a cache line size. */
27517 int max_align
27518 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
27520 if (max_align < BITS_PER_WORD)
27521 max_align = BITS_PER_WORD;
27523 switch (ix86_align_data_type)
27525 case ix86_align_data_type_abi: opt = false; break;
27526 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
27527 case ix86_align_data_type_cacheline: break;
27530 if (TARGET_IAMCU)
27531 align = iamcu_alignment (type, align);
27533 if (opt
27534 && AGGREGATE_TYPE_P (type)
27535 && TYPE_SIZE (type)
27536 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
27538 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
27539 && align < max_align_compat)
27540 align = max_align_compat;
27541 if (wi::geu_p (TYPE_SIZE (type), max_align)
27542 && align < max_align)
27543 align = max_align;
27546 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27547 to 16byte boundary. */
27548 if (TARGET_64BIT)
27550 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
27551 && TYPE_SIZE (type)
27552 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27553 && wi::geu_p (TYPE_SIZE (type), 128)
27554 && align < 128)
27555 return 128;
27558 if (!opt)
27559 return align;
27561 if (TREE_CODE (type) == ARRAY_TYPE)
27563 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27564 return 64;
27565 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27566 return 128;
27568 else if (TREE_CODE (type) == COMPLEX_TYPE)
27571 if (TYPE_MODE (type) == DCmode && align < 64)
27572 return 64;
27573 if ((TYPE_MODE (type) == XCmode
27574 || TYPE_MODE (type) == TCmode) && align < 128)
27575 return 128;
27577 else if ((TREE_CODE (type) == RECORD_TYPE
27578 || TREE_CODE (type) == UNION_TYPE
27579 || TREE_CODE (type) == QUAL_UNION_TYPE)
27580 && TYPE_FIELDS (type))
27582 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27583 return 64;
27584 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27585 return 128;
27587 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27588 || TREE_CODE (type) == INTEGER_TYPE)
27590 if (TYPE_MODE (type) == DFmode && align < 64)
27591 return 64;
27592 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27593 return 128;
27596 return align;
27599 /* Compute the alignment for a local variable or a stack slot. EXP is
27600 the data type or decl itself, MODE is the widest mode available and
27601 ALIGN is the alignment that the object would ordinarily have. The
27602 value of this macro is used instead of that alignment to align the
27603 object. */
27605 unsigned int
27606 ix86_local_alignment (tree exp, machine_mode mode,
27607 unsigned int align)
27609 tree type, decl;
27611 if (exp && DECL_P (exp))
27613 type = TREE_TYPE (exp);
27614 decl = exp;
27616 else
27618 type = exp;
27619 decl = NULL;
27622 /* Don't do dynamic stack realignment for long long objects with
27623 -mpreferred-stack-boundary=2. */
27624 if (!TARGET_64BIT
27625 && align == 64
27626 && ix86_preferred_stack_boundary < 64
27627 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
27628 && (!type || !TYPE_USER_ALIGN (type))
27629 && (!decl || !DECL_USER_ALIGN (decl)))
27630 align = 32;
27632 /* If TYPE is NULL, we are allocating a stack slot for caller-save
27633 register in MODE. We will return the largest alignment of XF
27634 and DF. */
27635 if (!type)
27637 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
27638 align = GET_MODE_ALIGNMENT (DFmode);
27639 return align;
27642 /* Don't increase alignment for Intel MCU psABI. */
27643 if (TARGET_IAMCU)
27644 return align;
27646 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27647 to 16byte boundary. Exact wording is:
27649 An array uses the same alignment as its elements, except that a local or
27650 global array variable of length at least 16 bytes or
27651 a C99 variable-length array variable always has alignment of at least 16 bytes.
27653 This was added to allow use of aligned SSE instructions at arrays. This
27654 rule is meant for static storage (where compiler can not do the analysis
27655 by itself). We follow it for automatic variables only when convenient.
27656 We fully control everything in the function compiled and functions from
27657 other unit can not rely on the alignment.
27659 Exclude va_list type. It is the common case of local array where
27660 we can not benefit from the alignment.
27662 TODO: Probably one should optimize for size only when var is not escaping. */
27663 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
27664 && TARGET_SSE)
27666 if (AGGREGATE_TYPE_P (type)
27667 && (va_list_type_node == NULL_TREE
27668 || (TYPE_MAIN_VARIANT (type)
27669 != TYPE_MAIN_VARIANT (va_list_type_node)))
27670 && TYPE_SIZE (type)
27671 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27672 && wi::geu_p (TYPE_SIZE (type), 16)
27673 && align < 128)
27674 return 128;
27676 if (TREE_CODE (type) == ARRAY_TYPE)
27678 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27679 return 64;
27680 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27681 return 128;
27683 else if (TREE_CODE (type) == COMPLEX_TYPE)
27685 if (TYPE_MODE (type) == DCmode && align < 64)
27686 return 64;
27687 if ((TYPE_MODE (type) == XCmode
27688 || TYPE_MODE (type) == TCmode) && align < 128)
27689 return 128;
27691 else if ((TREE_CODE (type) == RECORD_TYPE
27692 || TREE_CODE (type) == UNION_TYPE
27693 || TREE_CODE (type) == QUAL_UNION_TYPE)
27694 && TYPE_FIELDS (type))
27696 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27697 return 64;
27698 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27699 return 128;
27701 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27702 || TREE_CODE (type) == INTEGER_TYPE)
27705 if (TYPE_MODE (type) == DFmode && align < 64)
27706 return 64;
27707 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27708 return 128;
27710 return align;
27713 /* Compute the minimum required alignment for dynamic stack realignment
27714 purposes for a local variable, parameter or a stack slot. EXP is
27715 the data type or decl itself, MODE is its mode and ALIGN is the
27716 alignment that the object would ordinarily have. */
27718 unsigned int
27719 ix86_minimum_alignment (tree exp, machine_mode mode,
27720 unsigned int align)
27722 tree type, decl;
27724 if (exp && DECL_P (exp))
27726 type = TREE_TYPE (exp);
27727 decl = exp;
27729 else
27731 type = exp;
27732 decl = NULL;
27735 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
27736 return align;
27738 /* Don't do dynamic stack realignment for long long objects with
27739 -mpreferred-stack-boundary=2. */
27740 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
27741 && (!type || !TYPE_USER_ALIGN (type))
27742 && (!decl || !DECL_USER_ALIGN (decl)))
27743 return 32;
27745 return align;
27748 /* Find a location for the static chain incoming to a nested function.
27749 This is a register, unless all free registers are used by arguments. */
27751 static rtx
27752 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
27754 unsigned regno;
27756 /* While this function won't be called by the middle-end when a static
27757 chain isn't needed, it's also used throughout the backend so it's
27758 easiest to keep this check centralized. */
27759 if (DECL_P (fndecl_or_type) && !DECL_STATIC_CHAIN (fndecl_or_type))
27760 return NULL;
27762 if (TARGET_64BIT)
27764 /* We always use R10 in 64-bit mode. */
27765 regno = R10_REG;
27767 else
27769 const_tree fntype, fndecl;
27770 unsigned int ccvt;
27772 /* By default in 32-bit mode we use ECX to pass the static chain. */
27773 regno = CX_REG;
27775 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
27777 fntype = TREE_TYPE (fndecl_or_type);
27778 fndecl = fndecl_or_type;
27780 else
27782 fntype = fndecl_or_type;
27783 fndecl = NULL;
27786 ccvt = ix86_get_callcvt (fntype);
27787 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
27789 /* Fastcall functions use ecx/edx for arguments, which leaves
27790 us with EAX for the static chain.
27791 Thiscall functions use ecx for arguments, which also
27792 leaves us with EAX for the static chain. */
27793 regno = AX_REG;
27795 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
27797 /* Thiscall functions use ecx for arguments, which leaves
27798 us with EAX and EDX for the static chain.
27799 We are using for abi-compatibility EAX. */
27800 regno = AX_REG;
27802 else if (ix86_function_regparm (fntype, fndecl) == 3)
27804 /* For regparm 3, we have no free call-clobbered registers in
27805 which to store the static chain. In order to implement this,
27806 we have the trampoline push the static chain to the stack.
27807 However, we can't push a value below the return address when
27808 we call the nested function directly, so we have to use an
27809 alternate entry point. For this we use ESI, and have the
27810 alternate entry point push ESI, so that things appear the
27811 same once we're executing the nested function. */
27812 if (incoming_p)
27814 if (fndecl == current_function_decl)
27815 ix86_static_chain_on_stack = true;
27816 return gen_frame_mem (SImode,
27817 plus_constant (Pmode,
27818 arg_pointer_rtx, -8));
27820 regno = SI_REG;
27824 return gen_rtx_REG (Pmode, regno);
27827 /* Emit RTL insns to initialize the variable parts of a trampoline.
27828 FNDECL is the decl of the target address; M_TRAMP is a MEM for
27829 the trampoline, and CHAIN_VALUE is an RTX for the static chain
27830 to be passed to the target function. */
27832 static void
27833 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
27835 rtx mem, fnaddr;
27836 int opcode;
27837 int offset = 0;
27839 fnaddr = XEXP (DECL_RTL (fndecl), 0);
27841 if (TARGET_64BIT)
27843 int size;
27845 /* Load the function address to r11. Try to load address using
27846 the shorter movl instead of movabs. We may want to support
27847 movq for kernel mode, but kernel does not use trampolines at
27848 the moment. FNADDR is a 32bit address and may not be in
27849 DImode when ptr_mode == SImode. Always use movl in this
27850 case. */
27851 if (ptr_mode == SImode
27852 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
27854 fnaddr = copy_addr_to_reg (fnaddr);
27856 mem = adjust_address (m_tramp, HImode, offset);
27857 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
27859 mem = adjust_address (m_tramp, SImode, offset + 2);
27860 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
27861 offset += 6;
27863 else
27865 mem = adjust_address (m_tramp, HImode, offset);
27866 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
27868 mem = adjust_address (m_tramp, DImode, offset + 2);
27869 emit_move_insn (mem, fnaddr);
27870 offset += 10;
27873 /* Load static chain using movabs to r10. Use the shorter movl
27874 instead of movabs when ptr_mode == SImode. */
27875 if (ptr_mode == SImode)
27877 opcode = 0xba41;
27878 size = 6;
27880 else
27882 opcode = 0xba49;
27883 size = 10;
27886 mem = adjust_address (m_tramp, HImode, offset);
27887 emit_move_insn (mem, gen_int_mode (opcode, HImode));
27889 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
27890 emit_move_insn (mem, chain_value);
27891 offset += size;
27893 /* Jump to r11; the last (unused) byte is a nop, only there to
27894 pad the write out to a single 32-bit store. */
27895 mem = adjust_address (m_tramp, SImode, offset);
27896 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
27897 offset += 4;
27899 else
27901 rtx disp, chain;
27903 /* Depending on the static chain location, either load a register
27904 with a constant, or push the constant to the stack. All of the
27905 instructions are the same size. */
27906 chain = ix86_static_chain (fndecl, true);
27907 if (REG_P (chain))
27909 switch (REGNO (chain))
27911 case AX_REG:
27912 opcode = 0xb8; break;
27913 case CX_REG:
27914 opcode = 0xb9; break;
27915 default:
27916 gcc_unreachable ();
27919 else
27920 opcode = 0x68;
27922 mem = adjust_address (m_tramp, QImode, offset);
27923 emit_move_insn (mem, gen_int_mode (opcode, QImode));
27925 mem = adjust_address (m_tramp, SImode, offset + 1);
27926 emit_move_insn (mem, chain_value);
27927 offset += 5;
27929 mem = adjust_address (m_tramp, QImode, offset);
27930 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27932 mem = adjust_address (m_tramp, SImode, offset + 1);
27934 /* Compute offset from the end of the jmp to the target function.
27935 In the case in which the trampoline stores the static chain on
27936 the stack, we need to skip the first insn which pushes the
27937 (call-saved) register static chain; this push is 1 byte. */
27938 offset += 5;
27939 disp = expand_binop (SImode, sub_optab, fnaddr,
27940 plus_constant (Pmode, XEXP (m_tramp, 0),
27941 offset - (MEM_P (chain) ? 1 : 0)),
27942 NULL_RTX, 1, OPTAB_DIRECT);
27943 emit_move_insn (mem, disp);
27946 gcc_assert (offset <= TRAMPOLINE_SIZE);
27948 #ifdef HAVE_ENABLE_EXECUTE_STACK
27949 #ifdef CHECK_EXECUTE_STACK_ENABLED
27950 if (CHECK_EXECUTE_STACK_ENABLED)
27951 #endif
27952 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27953 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27954 #endif
27957 /* The following file contains several enumerations and data structures
27958 built from the definitions in i386-builtin-types.def. */
27960 #include "i386-builtin-types.inc"
27962 /* Table for the ix86 builtin non-function types. */
27963 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27965 /* Retrieve an element from the above table, building some of
27966 the types lazily. */
27968 static tree
27969 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27971 unsigned int index;
27972 tree type, itype;
27974 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27976 type = ix86_builtin_type_tab[(int) tcode];
27977 if (type != NULL)
27978 return type;
27980 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27981 if (tcode <= IX86_BT_LAST_VECT)
27983 machine_mode mode;
27985 index = tcode - IX86_BT_LAST_PRIM - 1;
27986 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27987 mode = ix86_builtin_type_vect_mode[index];
27989 type = build_vector_type_for_mode (itype, mode);
27991 else
27993 int quals;
27995 index = tcode - IX86_BT_LAST_VECT - 1;
27996 if (tcode <= IX86_BT_LAST_PTR)
27997 quals = TYPE_UNQUALIFIED;
27998 else
27999 quals = TYPE_QUAL_CONST;
28001 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
28002 if (quals != TYPE_UNQUALIFIED)
28003 itype = build_qualified_type (itype, quals);
28005 type = build_pointer_type (itype);
28008 ix86_builtin_type_tab[(int) tcode] = type;
28009 return type;
28012 /* Table for the ix86 builtin function types. */
28013 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
28015 /* Retrieve an element from the above table, building some of
28016 the types lazily. */
28018 static tree
28019 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
28021 tree type;
28023 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
28025 type = ix86_builtin_func_type_tab[(int) tcode];
28026 if (type != NULL)
28027 return type;
28029 if (tcode <= IX86_BT_LAST_FUNC)
28031 unsigned start = ix86_builtin_func_start[(int) tcode];
28032 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
28033 tree rtype, atype, args = void_list_node;
28034 unsigned i;
28036 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
28037 for (i = after - 1; i > start; --i)
28039 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
28040 args = tree_cons (NULL, atype, args);
28043 type = build_function_type (rtype, args);
28045 else
28047 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
28048 enum ix86_builtin_func_type icode;
28050 icode = ix86_builtin_func_alias_base[index];
28051 type = ix86_get_builtin_func_type (icode);
28054 ix86_builtin_func_type_tab[(int) tcode] = type;
28055 return type;
28059 /* Codes for all the SSE/MMX builtins. */
28060 enum ix86_builtins
28062 IX86_BUILTIN_ADDPS,
28063 IX86_BUILTIN_ADDSS,
28064 IX86_BUILTIN_DIVPS,
28065 IX86_BUILTIN_DIVSS,
28066 IX86_BUILTIN_MULPS,
28067 IX86_BUILTIN_MULSS,
28068 IX86_BUILTIN_SUBPS,
28069 IX86_BUILTIN_SUBSS,
28071 IX86_BUILTIN_CMPEQPS,
28072 IX86_BUILTIN_CMPLTPS,
28073 IX86_BUILTIN_CMPLEPS,
28074 IX86_BUILTIN_CMPGTPS,
28075 IX86_BUILTIN_CMPGEPS,
28076 IX86_BUILTIN_CMPNEQPS,
28077 IX86_BUILTIN_CMPNLTPS,
28078 IX86_BUILTIN_CMPNLEPS,
28079 IX86_BUILTIN_CMPNGTPS,
28080 IX86_BUILTIN_CMPNGEPS,
28081 IX86_BUILTIN_CMPORDPS,
28082 IX86_BUILTIN_CMPUNORDPS,
28083 IX86_BUILTIN_CMPEQSS,
28084 IX86_BUILTIN_CMPLTSS,
28085 IX86_BUILTIN_CMPLESS,
28086 IX86_BUILTIN_CMPNEQSS,
28087 IX86_BUILTIN_CMPNLTSS,
28088 IX86_BUILTIN_CMPNLESS,
28089 IX86_BUILTIN_CMPORDSS,
28090 IX86_BUILTIN_CMPUNORDSS,
28092 IX86_BUILTIN_COMIEQSS,
28093 IX86_BUILTIN_COMILTSS,
28094 IX86_BUILTIN_COMILESS,
28095 IX86_BUILTIN_COMIGTSS,
28096 IX86_BUILTIN_COMIGESS,
28097 IX86_BUILTIN_COMINEQSS,
28098 IX86_BUILTIN_UCOMIEQSS,
28099 IX86_BUILTIN_UCOMILTSS,
28100 IX86_BUILTIN_UCOMILESS,
28101 IX86_BUILTIN_UCOMIGTSS,
28102 IX86_BUILTIN_UCOMIGESS,
28103 IX86_BUILTIN_UCOMINEQSS,
28105 IX86_BUILTIN_CVTPI2PS,
28106 IX86_BUILTIN_CVTPS2PI,
28107 IX86_BUILTIN_CVTSI2SS,
28108 IX86_BUILTIN_CVTSI642SS,
28109 IX86_BUILTIN_CVTSS2SI,
28110 IX86_BUILTIN_CVTSS2SI64,
28111 IX86_BUILTIN_CVTTPS2PI,
28112 IX86_BUILTIN_CVTTSS2SI,
28113 IX86_BUILTIN_CVTTSS2SI64,
28115 IX86_BUILTIN_MAXPS,
28116 IX86_BUILTIN_MAXSS,
28117 IX86_BUILTIN_MINPS,
28118 IX86_BUILTIN_MINSS,
28120 IX86_BUILTIN_LOADUPS,
28121 IX86_BUILTIN_STOREUPS,
28122 IX86_BUILTIN_MOVSS,
28124 IX86_BUILTIN_MOVHLPS,
28125 IX86_BUILTIN_MOVLHPS,
28126 IX86_BUILTIN_LOADHPS,
28127 IX86_BUILTIN_LOADLPS,
28128 IX86_BUILTIN_STOREHPS,
28129 IX86_BUILTIN_STORELPS,
28131 IX86_BUILTIN_MASKMOVQ,
28132 IX86_BUILTIN_MOVMSKPS,
28133 IX86_BUILTIN_PMOVMSKB,
28135 IX86_BUILTIN_MOVNTPS,
28136 IX86_BUILTIN_MOVNTQ,
28138 IX86_BUILTIN_LOADDQU,
28139 IX86_BUILTIN_STOREDQU,
28141 IX86_BUILTIN_PACKSSWB,
28142 IX86_BUILTIN_PACKSSDW,
28143 IX86_BUILTIN_PACKUSWB,
28145 IX86_BUILTIN_PADDB,
28146 IX86_BUILTIN_PADDW,
28147 IX86_BUILTIN_PADDD,
28148 IX86_BUILTIN_PADDQ,
28149 IX86_BUILTIN_PADDSB,
28150 IX86_BUILTIN_PADDSW,
28151 IX86_BUILTIN_PADDUSB,
28152 IX86_BUILTIN_PADDUSW,
28153 IX86_BUILTIN_PSUBB,
28154 IX86_BUILTIN_PSUBW,
28155 IX86_BUILTIN_PSUBD,
28156 IX86_BUILTIN_PSUBQ,
28157 IX86_BUILTIN_PSUBSB,
28158 IX86_BUILTIN_PSUBSW,
28159 IX86_BUILTIN_PSUBUSB,
28160 IX86_BUILTIN_PSUBUSW,
28162 IX86_BUILTIN_PAND,
28163 IX86_BUILTIN_PANDN,
28164 IX86_BUILTIN_POR,
28165 IX86_BUILTIN_PXOR,
28167 IX86_BUILTIN_PAVGB,
28168 IX86_BUILTIN_PAVGW,
28170 IX86_BUILTIN_PCMPEQB,
28171 IX86_BUILTIN_PCMPEQW,
28172 IX86_BUILTIN_PCMPEQD,
28173 IX86_BUILTIN_PCMPGTB,
28174 IX86_BUILTIN_PCMPGTW,
28175 IX86_BUILTIN_PCMPGTD,
28177 IX86_BUILTIN_PMADDWD,
28179 IX86_BUILTIN_PMAXSW,
28180 IX86_BUILTIN_PMAXUB,
28181 IX86_BUILTIN_PMINSW,
28182 IX86_BUILTIN_PMINUB,
28184 IX86_BUILTIN_PMULHUW,
28185 IX86_BUILTIN_PMULHW,
28186 IX86_BUILTIN_PMULLW,
28188 IX86_BUILTIN_PSADBW,
28189 IX86_BUILTIN_PSHUFW,
28191 IX86_BUILTIN_PSLLW,
28192 IX86_BUILTIN_PSLLD,
28193 IX86_BUILTIN_PSLLQ,
28194 IX86_BUILTIN_PSRAW,
28195 IX86_BUILTIN_PSRAD,
28196 IX86_BUILTIN_PSRLW,
28197 IX86_BUILTIN_PSRLD,
28198 IX86_BUILTIN_PSRLQ,
28199 IX86_BUILTIN_PSLLWI,
28200 IX86_BUILTIN_PSLLDI,
28201 IX86_BUILTIN_PSLLQI,
28202 IX86_BUILTIN_PSRAWI,
28203 IX86_BUILTIN_PSRADI,
28204 IX86_BUILTIN_PSRLWI,
28205 IX86_BUILTIN_PSRLDI,
28206 IX86_BUILTIN_PSRLQI,
28208 IX86_BUILTIN_PUNPCKHBW,
28209 IX86_BUILTIN_PUNPCKHWD,
28210 IX86_BUILTIN_PUNPCKHDQ,
28211 IX86_BUILTIN_PUNPCKLBW,
28212 IX86_BUILTIN_PUNPCKLWD,
28213 IX86_BUILTIN_PUNPCKLDQ,
28215 IX86_BUILTIN_SHUFPS,
28217 IX86_BUILTIN_RCPPS,
28218 IX86_BUILTIN_RCPSS,
28219 IX86_BUILTIN_RSQRTPS,
28220 IX86_BUILTIN_RSQRTPS_NR,
28221 IX86_BUILTIN_RSQRTSS,
28222 IX86_BUILTIN_RSQRTF,
28223 IX86_BUILTIN_SQRTPS,
28224 IX86_BUILTIN_SQRTPS_NR,
28225 IX86_BUILTIN_SQRTSS,
28227 IX86_BUILTIN_UNPCKHPS,
28228 IX86_BUILTIN_UNPCKLPS,
28230 IX86_BUILTIN_ANDPS,
28231 IX86_BUILTIN_ANDNPS,
28232 IX86_BUILTIN_ORPS,
28233 IX86_BUILTIN_XORPS,
28235 IX86_BUILTIN_EMMS,
28236 IX86_BUILTIN_LDMXCSR,
28237 IX86_BUILTIN_STMXCSR,
28238 IX86_BUILTIN_SFENCE,
28240 IX86_BUILTIN_FXSAVE,
28241 IX86_BUILTIN_FXRSTOR,
28242 IX86_BUILTIN_FXSAVE64,
28243 IX86_BUILTIN_FXRSTOR64,
28245 IX86_BUILTIN_XSAVE,
28246 IX86_BUILTIN_XRSTOR,
28247 IX86_BUILTIN_XSAVE64,
28248 IX86_BUILTIN_XRSTOR64,
28250 IX86_BUILTIN_XSAVEOPT,
28251 IX86_BUILTIN_XSAVEOPT64,
28253 IX86_BUILTIN_XSAVEC,
28254 IX86_BUILTIN_XSAVEC64,
28256 IX86_BUILTIN_XSAVES,
28257 IX86_BUILTIN_XRSTORS,
28258 IX86_BUILTIN_XSAVES64,
28259 IX86_BUILTIN_XRSTORS64,
28261 /* 3DNow! Original */
28262 IX86_BUILTIN_FEMMS,
28263 IX86_BUILTIN_PAVGUSB,
28264 IX86_BUILTIN_PF2ID,
28265 IX86_BUILTIN_PFACC,
28266 IX86_BUILTIN_PFADD,
28267 IX86_BUILTIN_PFCMPEQ,
28268 IX86_BUILTIN_PFCMPGE,
28269 IX86_BUILTIN_PFCMPGT,
28270 IX86_BUILTIN_PFMAX,
28271 IX86_BUILTIN_PFMIN,
28272 IX86_BUILTIN_PFMUL,
28273 IX86_BUILTIN_PFRCP,
28274 IX86_BUILTIN_PFRCPIT1,
28275 IX86_BUILTIN_PFRCPIT2,
28276 IX86_BUILTIN_PFRSQIT1,
28277 IX86_BUILTIN_PFRSQRT,
28278 IX86_BUILTIN_PFSUB,
28279 IX86_BUILTIN_PFSUBR,
28280 IX86_BUILTIN_PI2FD,
28281 IX86_BUILTIN_PMULHRW,
28283 /* 3DNow! Athlon Extensions */
28284 IX86_BUILTIN_PF2IW,
28285 IX86_BUILTIN_PFNACC,
28286 IX86_BUILTIN_PFPNACC,
28287 IX86_BUILTIN_PI2FW,
28288 IX86_BUILTIN_PSWAPDSI,
28289 IX86_BUILTIN_PSWAPDSF,
28291 /* SSE2 */
28292 IX86_BUILTIN_ADDPD,
28293 IX86_BUILTIN_ADDSD,
28294 IX86_BUILTIN_DIVPD,
28295 IX86_BUILTIN_DIVSD,
28296 IX86_BUILTIN_MULPD,
28297 IX86_BUILTIN_MULSD,
28298 IX86_BUILTIN_SUBPD,
28299 IX86_BUILTIN_SUBSD,
28301 IX86_BUILTIN_CMPEQPD,
28302 IX86_BUILTIN_CMPLTPD,
28303 IX86_BUILTIN_CMPLEPD,
28304 IX86_BUILTIN_CMPGTPD,
28305 IX86_BUILTIN_CMPGEPD,
28306 IX86_BUILTIN_CMPNEQPD,
28307 IX86_BUILTIN_CMPNLTPD,
28308 IX86_BUILTIN_CMPNLEPD,
28309 IX86_BUILTIN_CMPNGTPD,
28310 IX86_BUILTIN_CMPNGEPD,
28311 IX86_BUILTIN_CMPORDPD,
28312 IX86_BUILTIN_CMPUNORDPD,
28313 IX86_BUILTIN_CMPEQSD,
28314 IX86_BUILTIN_CMPLTSD,
28315 IX86_BUILTIN_CMPLESD,
28316 IX86_BUILTIN_CMPNEQSD,
28317 IX86_BUILTIN_CMPNLTSD,
28318 IX86_BUILTIN_CMPNLESD,
28319 IX86_BUILTIN_CMPORDSD,
28320 IX86_BUILTIN_CMPUNORDSD,
28322 IX86_BUILTIN_COMIEQSD,
28323 IX86_BUILTIN_COMILTSD,
28324 IX86_BUILTIN_COMILESD,
28325 IX86_BUILTIN_COMIGTSD,
28326 IX86_BUILTIN_COMIGESD,
28327 IX86_BUILTIN_COMINEQSD,
28328 IX86_BUILTIN_UCOMIEQSD,
28329 IX86_BUILTIN_UCOMILTSD,
28330 IX86_BUILTIN_UCOMILESD,
28331 IX86_BUILTIN_UCOMIGTSD,
28332 IX86_BUILTIN_UCOMIGESD,
28333 IX86_BUILTIN_UCOMINEQSD,
28335 IX86_BUILTIN_MAXPD,
28336 IX86_BUILTIN_MAXSD,
28337 IX86_BUILTIN_MINPD,
28338 IX86_BUILTIN_MINSD,
28340 IX86_BUILTIN_ANDPD,
28341 IX86_BUILTIN_ANDNPD,
28342 IX86_BUILTIN_ORPD,
28343 IX86_BUILTIN_XORPD,
28345 IX86_BUILTIN_SQRTPD,
28346 IX86_BUILTIN_SQRTSD,
28348 IX86_BUILTIN_UNPCKHPD,
28349 IX86_BUILTIN_UNPCKLPD,
28351 IX86_BUILTIN_SHUFPD,
28353 IX86_BUILTIN_LOADUPD,
28354 IX86_BUILTIN_STOREUPD,
28355 IX86_BUILTIN_MOVSD,
28357 IX86_BUILTIN_LOADHPD,
28358 IX86_BUILTIN_LOADLPD,
28360 IX86_BUILTIN_CVTDQ2PD,
28361 IX86_BUILTIN_CVTDQ2PS,
28363 IX86_BUILTIN_CVTPD2DQ,
28364 IX86_BUILTIN_CVTPD2PI,
28365 IX86_BUILTIN_CVTPD2PS,
28366 IX86_BUILTIN_CVTTPD2DQ,
28367 IX86_BUILTIN_CVTTPD2PI,
28369 IX86_BUILTIN_CVTPI2PD,
28370 IX86_BUILTIN_CVTSI2SD,
28371 IX86_BUILTIN_CVTSI642SD,
28373 IX86_BUILTIN_CVTSD2SI,
28374 IX86_BUILTIN_CVTSD2SI64,
28375 IX86_BUILTIN_CVTSD2SS,
28376 IX86_BUILTIN_CVTSS2SD,
28377 IX86_BUILTIN_CVTTSD2SI,
28378 IX86_BUILTIN_CVTTSD2SI64,
28380 IX86_BUILTIN_CVTPS2DQ,
28381 IX86_BUILTIN_CVTPS2PD,
28382 IX86_BUILTIN_CVTTPS2DQ,
28384 IX86_BUILTIN_MOVNTI,
28385 IX86_BUILTIN_MOVNTI64,
28386 IX86_BUILTIN_MOVNTPD,
28387 IX86_BUILTIN_MOVNTDQ,
28389 IX86_BUILTIN_MOVQ128,
28391 /* SSE2 MMX */
28392 IX86_BUILTIN_MASKMOVDQU,
28393 IX86_BUILTIN_MOVMSKPD,
28394 IX86_BUILTIN_PMOVMSKB128,
28396 IX86_BUILTIN_PACKSSWB128,
28397 IX86_BUILTIN_PACKSSDW128,
28398 IX86_BUILTIN_PACKUSWB128,
28400 IX86_BUILTIN_PADDB128,
28401 IX86_BUILTIN_PADDW128,
28402 IX86_BUILTIN_PADDD128,
28403 IX86_BUILTIN_PADDQ128,
28404 IX86_BUILTIN_PADDSB128,
28405 IX86_BUILTIN_PADDSW128,
28406 IX86_BUILTIN_PADDUSB128,
28407 IX86_BUILTIN_PADDUSW128,
28408 IX86_BUILTIN_PSUBB128,
28409 IX86_BUILTIN_PSUBW128,
28410 IX86_BUILTIN_PSUBD128,
28411 IX86_BUILTIN_PSUBQ128,
28412 IX86_BUILTIN_PSUBSB128,
28413 IX86_BUILTIN_PSUBSW128,
28414 IX86_BUILTIN_PSUBUSB128,
28415 IX86_BUILTIN_PSUBUSW128,
28417 IX86_BUILTIN_PAND128,
28418 IX86_BUILTIN_PANDN128,
28419 IX86_BUILTIN_POR128,
28420 IX86_BUILTIN_PXOR128,
28422 IX86_BUILTIN_PAVGB128,
28423 IX86_BUILTIN_PAVGW128,
28425 IX86_BUILTIN_PCMPEQB128,
28426 IX86_BUILTIN_PCMPEQW128,
28427 IX86_BUILTIN_PCMPEQD128,
28428 IX86_BUILTIN_PCMPGTB128,
28429 IX86_BUILTIN_PCMPGTW128,
28430 IX86_BUILTIN_PCMPGTD128,
28432 IX86_BUILTIN_PMADDWD128,
28434 IX86_BUILTIN_PMAXSW128,
28435 IX86_BUILTIN_PMAXUB128,
28436 IX86_BUILTIN_PMINSW128,
28437 IX86_BUILTIN_PMINUB128,
28439 IX86_BUILTIN_PMULUDQ,
28440 IX86_BUILTIN_PMULUDQ128,
28441 IX86_BUILTIN_PMULHUW128,
28442 IX86_BUILTIN_PMULHW128,
28443 IX86_BUILTIN_PMULLW128,
28445 IX86_BUILTIN_PSADBW128,
28446 IX86_BUILTIN_PSHUFHW,
28447 IX86_BUILTIN_PSHUFLW,
28448 IX86_BUILTIN_PSHUFD,
28450 IX86_BUILTIN_PSLLDQI128,
28451 IX86_BUILTIN_PSLLWI128,
28452 IX86_BUILTIN_PSLLDI128,
28453 IX86_BUILTIN_PSLLQI128,
28454 IX86_BUILTIN_PSRAWI128,
28455 IX86_BUILTIN_PSRADI128,
28456 IX86_BUILTIN_PSRLDQI128,
28457 IX86_BUILTIN_PSRLWI128,
28458 IX86_BUILTIN_PSRLDI128,
28459 IX86_BUILTIN_PSRLQI128,
28461 IX86_BUILTIN_PSLLDQ128,
28462 IX86_BUILTIN_PSLLW128,
28463 IX86_BUILTIN_PSLLD128,
28464 IX86_BUILTIN_PSLLQ128,
28465 IX86_BUILTIN_PSRAW128,
28466 IX86_BUILTIN_PSRAD128,
28467 IX86_BUILTIN_PSRLW128,
28468 IX86_BUILTIN_PSRLD128,
28469 IX86_BUILTIN_PSRLQ128,
28471 IX86_BUILTIN_PUNPCKHBW128,
28472 IX86_BUILTIN_PUNPCKHWD128,
28473 IX86_BUILTIN_PUNPCKHDQ128,
28474 IX86_BUILTIN_PUNPCKHQDQ128,
28475 IX86_BUILTIN_PUNPCKLBW128,
28476 IX86_BUILTIN_PUNPCKLWD128,
28477 IX86_BUILTIN_PUNPCKLDQ128,
28478 IX86_BUILTIN_PUNPCKLQDQ128,
28480 IX86_BUILTIN_CLFLUSH,
28481 IX86_BUILTIN_MFENCE,
28482 IX86_BUILTIN_LFENCE,
28483 IX86_BUILTIN_PAUSE,
28485 IX86_BUILTIN_FNSTENV,
28486 IX86_BUILTIN_FLDENV,
28487 IX86_BUILTIN_FNSTSW,
28488 IX86_BUILTIN_FNCLEX,
28490 IX86_BUILTIN_BSRSI,
28491 IX86_BUILTIN_BSRDI,
28492 IX86_BUILTIN_RDPMC,
28493 IX86_BUILTIN_RDTSC,
28494 IX86_BUILTIN_RDTSCP,
28495 IX86_BUILTIN_ROLQI,
28496 IX86_BUILTIN_ROLHI,
28497 IX86_BUILTIN_RORQI,
28498 IX86_BUILTIN_RORHI,
28500 /* SSE3. */
28501 IX86_BUILTIN_ADDSUBPS,
28502 IX86_BUILTIN_HADDPS,
28503 IX86_BUILTIN_HSUBPS,
28504 IX86_BUILTIN_MOVSHDUP,
28505 IX86_BUILTIN_MOVSLDUP,
28506 IX86_BUILTIN_ADDSUBPD,
28507 IX86_BUILTIN_HADDPD,
28508 IX86_BUILTIN_HSUBPD,
28509 IX86_BUILTIN_LDDQU,
28511 IX86_BUILTIN_MONITOR,
28512 IX86_BUILTIN_MWAIT,
28514 /* SSSE3. */
28515 IX86_BUILTIN_PHADDW,
28516 IX86_BUILTIN_PHADDD,
28517 IX86_BUILTIN_PHADDSW,
28518 IX86_BUILTIN_PHSUBW,
28519 IX86_BUILTIN_PHSUBD,
28520 IX86_BUILTIN_PHSUBSW,
28521 IX86_BUILTIN_PMADDUBSW,
28522 IX86_BUILTIN_PMULHRSW,
28523 IX86_BUILTIN_PSHUFB,
28524 IX86_BUILTIN_PSIGNB,
28525 IX86_BUILTIN_PSIGNW,
28526 IX86_BUILTIN_PSIGND,
28527 IX86_BUILTIN_PALIGNR,
28528 IX86_BUILTIN_PABSB,
28529 IX86_BUILTIN_PABSW,
28530 IX86_BUILTIN_PABSD,
28532 IX86_BUILTIN_PHADDW128,
28533 IX86_BUILTIN_PHADDD128,
28534 IX86_BUILTIN_PHADDSW128,
28535 IX86_BUILTIN_PHSUBW128,
28536 IX86_BUILTIN_PHSUBD128,
28537 IX86_BUILTIN_PHSUBSW128,
28538 IX86_BUILTIN_PMADDUBSW128,
28539 IX86_BUILTIN_PMULHRSW128,
28540 IX86_BUILTIN_PSHUFB128,
28541 IX86_BUILTIN_PSIGNB128,
28542 IX86_BUILTIN_PSIGNW128,
28543 IX86_BUILTIN_PSIGND128,
28544 IX86_BUILTIN_PALIGNR128,
28545 IX86_BUILTIN_PABSB128,
28546 IX86_BUILTIN_PABSW128,
28547 IX86_BUILTIN_PABSD128,
28549 /* AMDFAM10 - SSE4A New Instructions. */
28550 IX86_BUILTIN_MOVNTSD,
28551 IX86_BUILTIN_MOVNTSS,
28552 IX86_BUILTIN_EXTRQI,
28553 IX86_BUILTIN_EXTRQ,
28554 IX86_BUILTIN_INSERTQI,
28555 IX86_BUILTIN_INSERTQ,
28557 /* SSE4.1. */
28558 IX86_BUILTIN_BLENDPD,
28559 IX86_BUILTIN_BLENDPS,
28560 IX86_BUILTIN_BLENDVPD,
28561 IX86_BUILTIN_BLENDVPS,
28562 IX86_BUILTIN_PBLENDVB128,
28563 IX86_BUILTIN_PBLENDW128,
28565 IX86_BUILTIN_DPPD,
28566 IX86_BUILTIN_DPPS,
28568 IX86_BUILTIN_INSERTPS128,
28570 IX86_BUILTIN_MOVNTDQA,
28571 IX86_BUILTIN_MPSADBW128,
28572 IX86_BUILTIN_PACKUSDW128,
28573 IX86_BUILTIN_PCMPEQQ,
28574 IX86_BUILTIN_PHMINPOSUW128,
28576 IX86_BUILTIN_PMAXSB128,
28577 IX86_BUILTIN_PMAXSD128,
28578 IX86_BUILTIN_PMAXUD128,
28579 IX86_BUILTIN_PMAXUW128,
28581 IX86_BUILTIN_PMINSB128,
28582 IX86_BUILTIN_PMINSD128,
28583 IX86_BUILTIN_PMINUD128,
28584 IX86_BUILTIN_PMINUW128,
28586 IX86_BUILTIN_PMOVSXBW128,
28587 IX86_BUILTIN_PMOVSXBD128,
28588 IX86_BUILTIN_PMOVSXBQ128,
28589 IX86_BUILTIN_PMOVSXWD128,
28590 IX86_BUILTIN_PMOVSXWQ128,
28591 IX86_BUILTIN_PMOVSXDQ128,
28593 IX86_BUILTIN_PMOVZXBW128,
28594 IX86_BUILTIN_PMOVZXBD128,
28595 IX86_BUILTIN_PMOVZXBQ128,
28596 IX86_BUILTIN_PMOVZXWD128,
28597 IX86_BUILTIN_PMOVZXWQ128,
28598 IX86_BUILTIN_PMOVZXDQ128,
28600 IX86_BUILTIN_PMULDQ128,
28601 IX86_BUILTIN_PMULLD128,
28603 IX86_BUILTIN_ROUNDSD,
28604 IX86_BUILTIN_ROUNDSS,
28606 IX86_BUILTIN_ROUNDPD,
28607 IX86_BUILTIN_ROUNDPS,
28609 IX86_BUILTIN_FLOORPD,
28610 IX86_BUILTIN_CEILPD,
28611 IX86_BUILTIN_TRUNCPD,
28612 IX86_BUILTIN_RINTPD,
28613 IX86_BUILTIN_ROUNDPD_AZ,
28615 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
28616 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
28617 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
28619 IX86_BUILTIN_FLOORPS,
28620 IX86_BUILTIN_CEILPS,
28621 IX86_BUILTIN_TRUNCPS,
28622 IX86_BUILTIN_RINTPS,
28623 IX86_BUILTIN_ROUNDPS_AZ,
28625 IX86_BUILTIN_FLOORPS_SFIX,
28626 IX86_BUILTIN_CEILPS_SFIX,
28627 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
28629 IX86_BUILTIN_PTESTZ,
28630 IX86_BUILTIN_PTESTC,
28631 IX86_BUILTIN_PTESTNZC,
28633 IX86_BUILTIN_VEC_INIT_V2SI,
28634 IX86_BUILTIN_VEC_INIT_V4HI,
28635 IX86_BUILTIN_VEC_INIT_V8QI,
28636 IX86_BUILTIN_VEC_EXT_V2DF,
28637 IX86_BUILTIN_VEC_EXT_V2DI,
28638 IX86_BUILTIN_VEC_EXT_V4SF,
28639 IX86_BUILTIN_VEC_EXT_V4SI,
28640 IX86_BUILTIN_VEC_EXT_V8HI,
28641 IX86_BUILTIN_VEC_EXT_V2SI,
28642 IX86_BUILTIN_VEC_EXT_V4HI,
28643 IX86_BUILTIN_VEC_EXT_V16QI,
28644 IX86_BUILTIN_VEC_SET_V2DI,
28645 IX86_BUILTIN_VEC_SET_V4SF,
28646 IX86_BUILTIN_VEC_SET_V4SI,
28647 IX86_BUILTIN_VEC_SET_V8HI,
28648 IX86_BUILTIN_VEC_SET_V4HI,
28649 IX86_BUILTIN_VEC_SET_V16QI,
28651 IX86_BUILTIN_VEC_PACK_SFIX,
28652 IX86_BUILTIN_VEC_PACK_SFIX256,
28654 /* SSE4.2. */
28655 IX86_BUILTIN_CRC32QI,
28656 IX86_BUILTIN_CRC32HI,
28657 IX86_BUILTIN_CRC32SI,
28658 IX86_BUILTIN_CRC32DI,
28660 IX86_BUILTIN_PCMPESTRI128,
28661 IX86_BUILTIN_PCMPESTRM128,
28662 IX86_BUILTIN_PCMPESTRA128,
28663 IX86_BUILTIN_PCMPESTRC128,
28664 IX86_BUILTIN_PCMPESTRO128,
28665 IX86_BUILTIN_PCMPESTRS128,
28666 IX86_BUILTIN_PCMPESTRZ128,
28667 IX86_BUILTIN_PCMPISTRI128,
28668 IX86_BUILTIN_PCMPISTRM128,
28669 IX86_BUILTIN_PCMPISTRA128,
28670 IX86_BUILTIN_PCMPISTRC128,
28671 IX86_BUILTIN_PCMPISTRO128,
28672 IX86_BUILTIN_PCMPISTRS128,
28673 IX86_BUILTIN_PCMPISTRZ128,
28675 IX86_BUILTIN_PCMPGTQ,
28677 /* AES instructions */
28678 IX86_BUILTIN_AESENC128,
28679 IX86_BUILTIN_AESENCLAST128,
28680 IX86_BUILTIN_AESDEC128,
28681 IX86_BUILTIN_AESDECLAST128,
28682 IX86_BUILTIN_AESIMC128,
28683 IX86_BUILTIN_AESKEYGENASSIST128,
28685 /* PCLMUL instruction */
28686 IX86_BUILTIN_PCLMULQDQ128,
28688 /* AVX */
28689 IX86_BUILTIN_ADDPD256,
28690 IX86_BUILTIN_ADDPS256,
28691 IX86_BUILTIN_ADDSUBPD256,
28692 IX86_BUILTIN_ADDSUBPS256,
28693 IX86_BUILTIN_ANDPD256,
28694 IX86_BUILTIN_ANDPS256,
28695 IX86_BUILTIN_ANDNPD256,
28696 IX86_BUILTIN_ANDNPS256,
28697 IX86_BUILTIN_BLENDPD256,
28698 IX86_BUILTIN_BLENDPS256,
28699 IX86_BUILTIN_BLENDVPD256,
28700 IX86_BUILTIN_BLENDVPS256,
28701 IX86_BUILTIN_DIVPD256,
28702 IX86_BUILTIN_DIVPS256,
28703 IX86_BUILTIN_DPPS256,
28704 IX86_BUILTIN_HADDPD256,
28705 IX86_BUILTIN_HADDPS256,
28706 IX86_BUILTIN_HSUBPD256,
28707 IX86_BUILTIN_HSUBPS256,
28708 IX86_BUILTIN_MAXPD256,
28709 IX86_BUILTIN_MAXPS256,
28710 IX86_BUILTIN_MINPD256,
28711 IX86_BUILTIN_MINPS256,
28712 IX86_BUILTIN_MULPD256,
28713 IX86_BUILTIN_MULPS256,
28714 IX86_BUILTIN_ORPD256,
28715 IX86_BUILTIN_ORPS256,
28716 IX86_BUILTIN_SHUFPD256,
28717 IX86_BUILTIN_SHUFPS256,
28718 IX86_BUILTIN_SUBPD256,
28719 IX86_BUILTIN_SUBPS256,
28720 IX86_BUILTIN_XORPD256,
28721 IX86_BUILTIN_XORPS256,
28722 IX86_BUILTIN_CMPSD,
28723 IX86_BUILTIN_CMPSS,
28724 IX86_BUILTIN_CMPPD,
28725 IX86_BUILTIN_CMPPS,
28726 IX86_BUILTIN_CMPPD256,
28727 IX86_BUILTIN_CMPPS256,
28728 IX86_BUILTIN_CVTDQ2PD256,
28729 IX86_BUILTIN_CVTDQ2PS256,
28730 IX86_BUILTIN_CVTPD2PS256,
28731 IX86_BUILTIN_CVTPS2DQ256,
28732 IX86_BUILTIN_CVTPS2PD256,
28733 IX86_BUILTIN_CVTTPD2DQ256,
28734 IX86_BUILTIN_CVTPD2DQ256,
28735 IX86_BUILTIN_CVTTPS2DQ256,
28736 IX86_BUILTIN_EXTRACTF128PD256,
28737 IX86_BUILTIN_EXTRACTF128PS256,
28738 IX86_BUILTIN_EXTRACTF128SI256,
28739 IX86_BUILTIN_VZEROALL,
28740 IX86_BUILTIN_VZEROUPPER,
28741 IX86_BUILTIN_VPERMILVARPD,
28742 IX86_BUILTIN_VPERMILVARPS,
28743 IX86_BUILTIN_VPERMILVARPD256,
28744 IX86_BUILTIN_VPERMILVARPS256,
28745 IX86_BUILTIN_VPERMILPD,
28746 IX86_BUILTIN_VPERMILPS,
28747 IX86_BUILTIN_VPERMILPD256,
28748 IX86_BUILTIN_VPERMILPS256,
28749 IX86_BUILTIN_VPERMIL2PD,
28750 IX86_BUILTIN_VPERMIL2PS,
28751 IX86_BUILTIN_VPERMIL2PD256,
28752 IX86_BUILTIN_VPERMIL2PS256,
28753 IX86_BUILTIN_VPERM2F128PD256,
28754 IX86_BUILTIN_VPERM2F128PS256,
28755 IX86_BUILTIN_VPERM2F128SI256,
28756 IX86_BUILTIN_VBROADCASTSS,
28757 IX86_BUILTIN_VBROADCASTSD256,
28758 IX86_BUILTIN_VBROADCASTSS256,
28759 IX86_BUILTIN_VBROADCASTPD256,
28760 IX86_BUILTIN_VBROADCASTPS256,
28761 IX86_BUILTIN_VINSERTF128PD256,
28762 IX86_BUILTIN_VINSERTF128PS256,
28763 IX86_BUILTIN_VINSERTF128SI256,
28764 IX86_BUILTIN_LOADUPD256,
28765 IX86_BUILTIN_LOADUPS256,
28766 IX86_BUILTIN_STOREUPD256,
28767 IX86_BUILTIN_STOREUPS256,
28768 IX86_BUILTIN_LDDQU256,
28769 IX86_BUILTIN_MOVNTDQ256,
28770 IX86_BUILTIN_MOVNTPD256,
28771 IX86_BUILTIN_MOVNTPS256,
28772 IX86_BUILTIN_LOADDQU256,
28773 IX86_BUILTIN_STOREDQU256,
28774 IX86_BUILTIN_MASKLOADPD,
28775 IX86_BUILTIN_MASKLOADPS,
28776 IX86_BUILTIN_MASKSTOREPD,
28777 IX86_BUILTIN_MASKSTOREPS,
28778 IX86_BUILTIN_MASKLOADPD256,
28779 IX86_BUILTIN_MASKLOADPS256,
28780 IX86_BUILTIN_MASKSTOREPD256,
28781 IX86_BUILTIN_MASKSTOREPS256,
28782 IX86_BUILTIN_MOVSHDUP256,
28783 IX86_BUILTIN_MOVSLDUP256,
28784 IX86_BUILTIN_MOVDDUP256,
28786 IX86_BUILTIN_SQRTPD256,
28787 IX86_BUILTIN_SQRTPS256,
28788 IX86_BUILTIN_SQRTPS_NR256,
28789 IX86_BUILTIN_RSQRTPS256,
28790 IX86_BUILTIN_RSQRTPS_NR256,
28792 IX86_BUILTIN_RCPPS256,
28794 IX86_BUILTIN_ROUNDPD256,
28795 IX86_BUILTIN_ROUNDPS256,
28797 IX86_BUILTIN_FLOORPD256,
28798 IX86_BUILTIN_CEILPD256,
28799 IX86_BUILTIN_TRUNCPD256,
28800 IX86_BUILTIN_RINTPD256,
28801 IX86_BUILTIN_ROUNDPD_AZ256,
28803 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
28804 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
28805 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
28807 IX86_BUILTIN_FLOORPS256,
28808 IX86_BUILTIN_CEILPS256,
28809 IX86_BUILTIN_TRUNCPS256,
28810 IX86_BUILTIN_RINTPS256,
28811 IX86_BUILTIN_ROUNDPS_AZ256,
28813 IX86_BUILTIN_FLOORPS_SFIX256,
28814 IX86_BUILTIN_CEILPS_SFIX256,
28815 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
28817 IX86_BUILTIN_UNPCKHPD256,
28818 IX86_BUILTIN_UNPCKLPD256,
28819 IX86_BUILTIN_UNPCKHPS256,
28820 IX86_BUILTIN_UNPCKLPS256,
28822 IX86_BUILTIN_SI256_SI,
28823 IX86_BUILTIN_PS256_PS,
28824 IX86_BUILTIN_PD256_PD,
28825 IX86_BUILTIN_SI_SI256,
28826 IX86_BUILTIN_PS_PS256,
28827 IX86_BUILTIN_PD_PD256,
28829 IX86_BUILTIN_VTESTZPD,
28830 IX86_BUILTIN_VTESTCPD,
28831 IX86_BUILTIN_VTESTNZCPD,
28832 IX86_BUILTIN_VTESTZPS,
28833 IX86_BUILTIN_VTESTCPS,
28834 IX86_BUILTIN_VTESTNZCPS,
28835 IX86_BUILTIN_VTESTZPD256,
28836 IX86_BUILTIN_VTESTCPD256,
28837 IX86_BUILTIN_VTESTNZCPD256,
28838 IX86_BUILTIN_VTESTZPS256,
28839 IX86_BUILTIN_VTESTCPS256,
28840 IX86_BUILTIN_VTESTNZCPS256,
28841 IX86_BUILTIN_PTESTZ256,
28842 IX86_BUILTIN_PTESTC256,
28843 IX86_BUILTIN_PTESTNZC256,
28845 IX86_BUILTIN_MOVMSKPD256,
28846 IX86_BUILTIN_MOVMSKPS256,
28848 /* AVX2 */
28849 IX86_BUILTIN_MPSADBW256,
28850 IX86_BUILTIN_PABSB256,
28851 IX86_BUILTIN_PABSW256,
28852 IX86_BUILTIN_PABSD256,
28853 IX86_BUILTIN_PACKSSDW256,
28854 IX86_BUILTIN_PACKSSWB256,
28855 IX86_BUILTIN_PACKUSDW256,
28856 IX86_BUILTIN_PACKUSWB256,
28857 IX86_BUILTIN_PADDB256,
28858 IX86_BUILTIN_PADDW256,
28859 IX86_BUILTIN_PADDD256,
28860 IX86_BUILTIN_PADDQ256,
28861 IX86_BUILTIN_PADDSB256,
28862 IX86_BUILTIN_PADDSW256,
28863 IX86_BUILTIN_PADDUSB256,
28864 IX86_BUILTIN_PADDUSW256,
28865 IX86_BUILTIN_PALIGNR256,
28866 IX86_BUILTIN_AND256I,
28867 IX86_BUILTIN_ANDNOT256I,
28868 IX86_BUILTIN_PAVGB256,
28869 IX86_BUILTIN_PAVGW256,
28870 IX86_BUILTIN_PBLENDVB256,
28871 IX86_BUILTIN_PBLENDVW256,
28872 IX86_BUILTIN_PCMPEQB256,
28873 IX86_BUILTIN_PCMPEQW256,
28874 IX86_BUILTIN_PCMPEQD256,
28875 IX86_BUILTIN_PCMPEQQ256,
28876 IX86_BUILTIN_PCMPGTB256,
28877 IX86_BUILTIN_PCMPGTW256,
28878 IX86_BUILTIN_PCMPGTD256,
28879 IX86_BUILTIN_PCMPGTQ256,
28880 IX86_BUILTIN_PHADDW256,
28881 IX86_BUILTIN_PHADDD256,
28882 IX86_BUILTIN_PHADDSW256,
28883 IX86_BUILTIN_PHSUBW256,
28884 IX86_BUILTIN_PHSUBD256,
28885 IX86_BUILTIN_PHSUBSW256,
28886 IX86_BUILTIN_PMADDUBSW256,
28887 IX86_BUILTIN_PMADDWD256,
28888 IX86_BUILTIN_PMAXSB256,
28889 IX86_BUILTIN_PMAXSW256,
28890 IX86_BUILTIN_PMAXSD256,
28891 IX86_BUILTIN_PMAXUB256,
28892 IX86_BUILTIN_PMAXUW256,
28893 IX86_BUILTIN_PMAXUD256,
28894 IX86_BUILTIN_PMINSB256,
28895 IX86_BUILTIN_PMINSW256,
28896 IX86_BUILTIN_PMINSD256,
28897 IX86_BUILTIN_PMINUB256,
28898 IX86_BUILTIN_PMINUW256,
28899 IX86_BUILTIN_PMINUD256,
28900 IX86_BUILTIN_PMOVMSKB256,
28901 IX86_BUILTIN_PMOVSXBW256,
28902 IX86_BUILTIN_PMOVSXBD256,
28903 IX86_BUILTIN_PMOVSXBQ256,
28904 IX86_BUILTIN_PMOVSXWD256,
28905 IX86_BUILTIN_PMOVSXWQ256,
28906 IX86_BUILTIN_PMOVSXDQ256,
28907 IX86_BUILTIN_PMOVZXBW256,
28908 IX86_BUILTIN_PMOVZXBD256,
28909 IX86_BUILTIN_PMOVZXBQ256,
28910 IX86_BUILTIN_PMOVZXWD256,
28911 IX86_BUILTIN_PMOVZXWQ256,
28912 IX86_BUILTIN_PMOVZXDQ256,
28913 IX86_BUILTIN_PMULDQ256,
28914 IX86_BUILTIN_PMULHRSW256,
28915 IX86_BUILTIN_PMULHUW256,
28916 IX86_BUILTIN_PMULHW256,
28917 IX86_BUILTIN_PMULLW256,
28918 IX86_BUILTIN_PMULLD256,
28919 IX86_BUILTIN_PMULUDQ256,
28920 IX86_BUILTIN_POR256,
28921 IX86_BUILTIN_PSADBW256,
28922 IX86_BUILTIN_PSHUFB256,
28923 IX86_BUILTIN_PSHUFD256,
28924 IX86_BUILTIN_PSHUFHW256,
28925 IX86_BUILTIN_PSHUFLW256,
28926 IX86_BUILTIN_PSIGNB256,
28927 IX86_BUILTIN_PSIGNW256,
28928 IX86_BUILTIN_PSIGND256,
28929 IX86_BUILTIN_PSLLDQI256,
28930 IX86_BUILTIN_PSLLWI256,
28931 IX86_BUILTIN_PSLLW256,
28932 IX86_BUILTIN_PSLLDI256,
28933 IX86_BUILTIN_PSLLD256,
28934 IX86_BUILTIN_PSLLQI256,
28935 IX86_BUILTIN_PSLLQ256,
28936 IX86_BUILTIN_PSRAWI256,
28937 IX86_BUILTIN_PSRAW256,
28938 IX86_BUILTIN_PSRADI256,
28939 IX86_BUILTIN_PSRAD256,
28940 IX86_BUILTIN_PSRLDQI256,
28941 IX86_BUILTIN_PSRLWI256,
28942 IX86_BUILTIN_PSRLW256,
28943 IX86_BUILTIN_PSRLDI256,
28944 IX86_BUILTIN_PSRLD256,
28945 IX86_BUILTIN_PSRLQI256,
28946 IX86_BUILTIN_PSRLQ256,
28947 IX86_BUILTIN_PSUBB256,
28948 IX86_BUILTIN_PSUBW256,
28949 IX86_BUILTIN_PSUBD256,
28950 IX86_BUILTIN_PSUBQ256,
28951 IX86_BUILTIN_PSUBSB256,
28952 IX86_BUILTIN_PSUBSW256,
28953 IX86_BUILTIN_PSUBUSB256,
28954 IX86_BUILTIN_PSUBUSW256,
28955 IX86_BUILTIN_PUNPCKHBW256,
28956 IX86_BUILTIN_PUNPCKHWD256,
28957 IX86_BUILTIN_PUNPCKHDQ256,
28958 IX86_BUILTIN_PUNPCKHQDQ256,
28959 IX86_BUILTIN_PUNPCKLBW256,
28960 IX86_BUILTIN_PUNPCKLWD256,
28961 IX86_BUILTIN_PUNPCKLDQ256,
28962 IX86_BUILTIN_PUNPCKLQDQ256,
28963 IX86_BUILTIN_PXOR256,
28964 IX86_BUILTIN_MOVNTDQA256,
28965 IX86_BUILTIN_VBROADCASTSS_PS,
28966 IX86_BUILTIN_VBROADCASTSS_PS256,
28967 IX86_BUILTIN_VBROADCASTSD_PD256,
28968 IX86_BUILTIN_VBROADCASTSI256,
28969 IX86_BUILTIN_PBLENDD256,
28970 IX86_BUILTIN_PBLENDD128,
28971 IX86_BUILTIN_PBROADCASTB256,
28972 IX86_BUILTIN_PBROADCASTW256,
28973 IX86_BUILTIN_PBROADCASTD256,
28974 IX86_BUILTIN_PBROADCASTQ256,
28975 IX86_BUILTIN_PBROADCASTB128,
28976 IX86_BUILTIN_PBROADCASTW128,
28977 IX86_BUILTIN_PBROADCASTD128,
28978 IX86_BUILTIN_PBROADCASTQ128,
28979 IX86_BUILTIN_VPERMVARSI256,
28980 IX86_BUILTIN_VPERMDF256,
28981 IX86_BUILTIN_VPERMVARSF256,
28982 IX86_BUILTIN_VPERMDI256,
28983 IX86_BUILTIN_VPERMTI256,
28984 IX86_BUILTIN_VEXTRACT128I256,
28985 IX86_BUILTIN_VINSERT128I256,
28986 IX86_BUILTIN_MASKLOADD,
28987 IX86_BUILTIN_MASKLOADQ,
28988 IX86_BUILTIN_MASKLOADD256,
28989 IX86_BUILTIN_MASKLOADQ256,
28990 IX86_BUILTIN_MASKSTORED,
28991 IX86_BUILTIN_MASKSTOREQ,
28992 IX86_BUILTIN_MASKSTORED256,
28993 IX86_BUILTIN_MASKSTOREQ256,
28994 IX86_BUILTIN_PSLLVV4DI,
28995 IX86_BUILTIN_PSLLVV2DI,
28996 IX86_BUILTIN_PSLLVV8SI,
28997 IX86_BUILTIN_PSLLVV4SI,
28998 IX86_BUILTIN_PSRAVV8SI,
28999 IX86_BUILTIN_PSRAVV4SI,
29000 IX86_BUILTIN_PSRLVV4DI,
29001 IX86_BUILTIN_PSRLVV2DI,
29002 IX86_BUILTIN_PSRLVV8SI,
29003 IX86_BUILTIN_PSRLVV4SI,
29005 IX86_BUILTIN_GATHERSIV2DF,
29006 IX86_BUILTIN_GATHERSIV4DF,
29007 IX86_BUILTIN_GATHERDIV2DF,
29008 IX86_BUILTIN_GATHERDIV4DF,
29009 IX86_BUILTIN_GATHERSIV4SF,
29010 IX86_BUILTIN_GATHERSIV8SF,
29011 IX86_BUILTIN_GATHERDIV4SF,
29012 IX86_BUILTIN_GATHERDIV8SF,
29013 IX86_BUILTIN_GATHERSIV2DI,
29014 IX86_BUILTIN_GATHERSIV4DI,
29015 IX86_BUILTIN_GATHERDIV2DI,
29016 IX86_BUILTIN_GATHERDIV4DI,
29017 IX86_BUILTIN_GATHERSIV4SI,
29018 IX86_BUILTIN_GATHERSIV8SI,
29019 IX86_BUILTIN_GATHERDIV4SI,
29020 IX86_BUILTIN_GATHERDIV8SI,
29022 /* AVX512F */
29023 IX86_BUILTIN_SI512_SI256,
29024 IX86_BUILTIN_PD512_PD256,
29025 IX86_BUILTIN_PS512_PS256,
29026 IX86_BUILTIN_SI512_SI,
29027 IX86_BUILTIN_PD512_PD,
29028 IX86_BUILTIN_PS512_PS,
29029 IX86_BUILTIN_ADDPD512,
29030 IX86_BUILTIN_ADDPS512,
29031 IX86_BUILTIN_ADDSD_ROUND,
29032 IX86_BUILTIN_ADDSS_ROUND,
29033 IX86_BUILTIN_ALIGND512,
29034 IX86_BUILTIN_ALIGNQ512,
29035 IX86_BUILTIN_BLENDMD512,
29036 IX86_BUILTIN_BLENDMPD512,
29037 IX86_BUILTIN_BLENDMPS512,
29038 IX86_BUILTIN_BLENDMQ512,
29039 IX86_BUILTIN_BROADCASTF32X4_512,
29040 IX86_BUILTIN_BROADCASTF64X4_512,
29041 IX86_BUILTIN_BROADCASTI32X4_512,
29042 IX86_BUILTIN_BROADCASTI64X4_512,
29043 IX86_BUILTIN_BROADCASTSD512,
29044 IX86_BUILTIN_BROADCASTSS512,
29045 IX86_BUILTIN_CMPD512,
29046 IX86_BUILTIN_CMPPD512,
29047 IX86_BUILTIN_CMPPS512,
29048 IX86_BUILTIN_CMPQ512,
29049 IX86_BUILTIN_CMPSD_MASK,
29050 IX86_BUILTIN_CMPSS_MASK,
29051 IX86_BUILTIN_COMIDF,
29052 IX86_BUILTIN_COMISF,
29053 IX86_BUILTIN_COMPRESSPD512,
29054 IX86_BUILTIN_COMPRESSPDSTORE512,
29055 IX86_BUILTIN_COMPRESSPS512,
29056 IX86_BUILTIN_COMPRESSPSSTORE512,
29057 IX86_BUILTIN_CVTDQ2PD512,
29058 IX86_BUILTIN_CVTDQ2PS512,
29059 IX86_BUILTIN_CVTPD2DQ512,
29060 IX86_BUILTIN_CVTPD2PS512,
29061 IX86_BUILTIN_CVTPD2UDQ512,
29062 IX86_BUILTIN_CVTPH2PS512,
29063 IX86_BUILTIN_CVTPS2DQ512,
29064 IX86_BUILTIN_CVTPS2PD512,
29065 IX86_BUILTIN_CVTPS2PH512,
29066 IX86_BUILTIN_CVTPS2UDQ512,
29067 IX86_BUILTIN_CVTSD2SS_ROUND,
29068 IX86_BUILTIN_CVTSI2SD64,
29069 IX86_BUILTIN_CVTSI2SS32,
29070 IX86_BUILTIN_CVTSI2SS64,
29071 IX86_BUILTIN_CVTSS2SD_ROUND,
29072 IX86_BUILTIN_CVTTPD2DQ512,
29073 IX86_BUILTIN_CVTTPD2UDQ512,
29074 IX86_BUILTIN_CVTTPS2DQ512,
29075 IX86_BUILTIN_CVTTPS2UDQ512,
29076 IX86_BUILTIN_CVTUDQ2PD512,
29077 IX86_BUILTIN_CVTUDQ2PS512,
29078 IX86_BUILTIN_CVTUSI2SD32,
29079 IX86_BUILTIN_CVTUSI2SD64,
29080 IX86_BUILTIN_CVTUSI2SS32,
29081 IX86_BUILTIN_CVTUSI2SS64,
29082 IX86_BUILTIN_DIVPD512,
29083 IX86_BUILTIN_DIVPS512,
29084 IX86_BUILTIN_DIVSD_ROUND,
29085 IX86_BUILTIN_DIVSS_ROUND,
29086 IX86_BUILTIN_EXPANDPD512,
29087 IX86_BUILTIN_EXPANDPD512Z,
29088 IX86_BUILTIN_EXPANDPDLOAD512,
29089 IX86_BUILTIN_EXPANDPDLOAD512Z,
29090 IX86_BUILTIN_EXPANDPS512,
29091 IX86_BUILTIN_EXPANDPS512Z,
29092 IX86_BUILTIN_EXPANDPSLOAD512,
29093 IX86_BUILTIN_EXPANDPSLOAD512Z,
29094 IX86_BUILTIN_EXTRACTF32X4,
29095 IX86_BUILTIN_EXTRACTF64X4,
29096 IX86_BUILTIN_EXTRACTI32X4,
29097 IX86_BUILTIN_EXTRACTI64X4,
29098 IX86_BUILTIN_FIXUPIMMPD512_MASK,
29099 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
29100 IX86_BUILTIN_FIXUPIMMPS512_MASK,
29101 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
29102 IX86_BUILTIN_FIXUPIMMSD128_MASK,
29103 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
29104 IX86_BUILTIN_FIXUPIMMSS128_MASK,
29105 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
29106 IX86_BUILTIN_GETEXPPD512,
29107 IX86_BUILTIN_GETEXPPS512,
29108 IX86_BUILTIN_GETEXPSD128,
29109 IX86_BUILTIN_GETEXPSS128,
29110 IX86_BUILTIN_GETMANTPD512,
29111 IX86_BUILTIN_GETMANTPS512,
29112 IX86_BUILTIN_GETMANTSD128,
29113 IX86_BUILTIN_GETMANTSS128,
29114 IX86_BUILTIN_INSERTF32X4,
29115 IX86_BUILTIN_INSERTF64X4,
29116 IX86_BUILTIN_INSERTI32X4,
29117 IX86_BUILTIN_INSERTI64X4,
29118 IX86_BUILTIN_LOADAPD512,
29119 IX86_BUILTIN_LOADAPS512,
29120 IX86_BUILTIN_LOADDQUDI512,
29121 IX86_BUILTIN_LOADDQUSI512,
29122 IX86_BUILTIN_LOADUPD512,
29123 IX86_BUILTIN_LOADUPS512,
29124 IX86_BUILTIN_MAXPD512,
29125 IX86_BUILTIN_MAXPS512,
29126 IX86_BUILTIN_MAXSD_ROUND,
29127 IX86_BUILTIN_MAXSS_ROUND,
29128 IX86_BUILTIN_MINPD512,
29129 IX86_BUILTIN_MINPS512,
29130 IX86_BUILTIN_MINSD_ROUND,
29131 IX86_BUILTIN_MINSS_ROUND,
29132 IX86_BUILTIN_MOVAPD512,
29133 IX86_BUILTIN_MOVAPS512,
29134 IX86_BUILTIN_MOVDDUP512,
29135 IX86_BUILTIN_MOVDQA32LOAD512,
29136 IX86_BUILTIN_MOVDQA32STORE512,
29137 IX86_BUILTIN_MOVDQA32_512,
29138 IX86_BUILTIN_MOVDQA64LOAD512,
29139 IX86_BUILTIN_MOVDQA64STORE512,
29140 IX86_BUILTIN_MOVDQA64_512,
29141 IX86_BUILTIN_MOVNTDQ512,
29142 IX86_BUILTIN_MOVNTDQA512,
29143 IX86_BUILTIN_MOVNTPD512,
29144 IX86_BUILTIN_MOVNTPS512,
29145 IX86_BUILTIN_MOVSHDUP512,
29146 IX86_BUILTIN_MOVSLDUP512,
29147 IX86_BUILTIN_MULPD512,
29148 IX86_BUILTIN_MULPS512,
29149 IX86_BUILTIN_MULSD_ROUND,
29150 IX86_BUILTIN_MULSS_ROUND,
29151 IX86_BUILTIN_PABSD512,
29152 IX86_BUILTIN_PABSQ512,
29153 IX86_BUILTIN_PADDD512,
29154 IX86_BUILTIN_PADDQ512,
29155 IX86_BUILTIN_PANDD512,
29156 IX86_BUILTIN_PANDND512,
29157 IX86_BUILTIN_PANDNQ512,
29158 IX86_BUILTIN_PANDQ512,
29159 IX86_BUILTIN_PBROADCASTD512,
29160 IX86_BUILTIN_PBROADCASTD512_GPR,
29161 IX86_BUILTIN_PBROADCASTMB512,
29162 IX86_BUILTIN_PBROADCASTMW512,
29163 IX86_BUILTIN_PBROADCASTQ512,
29164 IX86_BUILTIN_PBROADCASTQ512_GPR,
29165 IX86_BUILTIN_PCMPEQD512_MASK,
29166 IX86_BUILTIN_PCMPEQQ512_MASK,
29167 IX86_BUILTIN_PCMPGTD512_MASK,
29168 IX86_BUILTIN_PCMPGTQ512_MASK,
29169 IX86_BUILTIN_PCOMPRESSD512,
29170 IX86_BUILTIN_PCOMPRESSDSTORE512,
29171 IX86_BUILTIN_PCOMPRESSQ512,
29172 IX86_BUILTIN_PCOMPRESSQSTORE512,
29173 IX86_BUILTIN_PEXPANDD512,
29174 IX86_BUILTIN_PEXPANDD512Z,
29175 IX86_BUILTIN_PEXPANDDLOAD512,
29176 IX86_BUILTIN_PEXPANDDLOAD512Z,
29177 IX86_BUILTIN_PEXPANDQ512,
29178 IX86_BUILTIN_PEXPANDQ512Z,
29179 IX86_BUILTIN_PEXPANDQLOAD512,
29180 IX86_BUILTIN_PEXPANDQLOAD512Z,
29181 IX86_BUILTIN_PMAXSD512,
29182 IX86_BUILTIN_PMAXSQ512,
29183 IX86_BUILTIN_PMAXUD512,
29184 IX86_BUILTIN_PMAXUQ512,
29185 IX86_BUILTIN_PMINSD512,
29186 IX86_BUILTIN_PMINSQ512,
29187 IX86_BUILTIN_PMINUD512,
29188 IX86_BUILTIN_PMINUQ512,
29189 IX86_BUILTIN_PMOVDB512,
29190 IX86_BUILTIN_PMOVDB512_MEM,
29191 IX86_BUILTIN_PMOVDW512,
29192 IX86_BUILTIN_PMOVDW512_MEM,
29193 IX86_BUILTIN_PMOVQB512,
29194 IX86_BUILTIN_PMOVQB512_MEM,
29195 IX86_BUILTIN_PMOVQD512,
29196 IX86_BUILTIN_PMOVQD512_MEM,
29197 IX86_BUILTIN_PMOVQW512,
29198 IX86_BUILTIN_PMOVQW512_MEM,
29199 IX86_BUILTIN_PMOVSDB512,
29200 IX86_BUILTIN_PMOVSDB512_MEM,
29201 IX86_BUILTIN_PMOVSDW512,
29202 IX86_BUILTIN_PMOVSDW512_MEM,
29203 IX86_BUILTIN_PMOVSQB512,
29204 IX86_BUILTIN_PMOVSQB512_MEM,
29205 IX86_BUILTIN_PMOVSQD512,
29206 IX86_BUILTIN_PMOVSQD512_MEM,
29207 IX86_BUILTIN_PMOVSQW512,
29208 IX86_BUILTIN_PMOVSQW512_MEM,
29209 IX86_BUILTIN_PMOVSXBD512,
29210 IX86_BUILTIN_PMOVSXBQ512,
29211 IX86_BUILTIN_PMOVSXDQ512,
29212 IX86_BUILTIN_PMOVSXWD512,
29213 IX86_BUILTIN_PMOVSXWQ512,
29214 IX86_BUILTIN_PMOVUSDB512,
29215 IX86_BUILTIN_PMOVUSDB512_MEM,
29216 IX86_BUILTIN_PMOVUSDW512,
29217 IX86_BUILTIN_PMOVUSDW512_MEM,
29218 IX86_BUILTIN_PMOVUSQB512,
29219 IX86_BUILTIN_PMOVUSQB512_MEM,
29220 IX86_BUILTIN_PMOVUSQD512,
29221 IX86_BUILTIN_PMOVUSQD512_MEM,
29222 IX86_BUILTIN_PMOVUSQW512,
29223 IX86_BUILTIN_PMOVUSQW512_MEM,
29224 IX86_BUILTIN_PMOVZXBD512,
29225 IX86_BUILTIN_PMOVZXBQ512,
29226 IX86_BUILTIN_PMOVZXDQ512,
29227 IX86_BUILTIN_PMOVZXWD512,
29228 IX86_BUILTIN_PMOVZXWQ512,
29229 IX86_BUILTIN_PMULDQ512,
29230 IX86_BUILTIN_PMULLD512,
29231 IX86_BUILTIN_PMULUDQ512,
29232 IX86_BUILTIN_PORD512,
29233 IX86_BUILTIN_PORQ512,
29234 IX86_BUILTIN_PROLD512,
29235 IX86_BUILTIN_PROLQ512,
29236 IX86_BUILTIN_PROLVD512,
29237 IX86_BUILTIN_PROLVQ512,
29238 IX86_BUILTIN_PRORD512,
29239 IX86_BUILTIN_PRORQ512,
29240 IX86_BUILTIN_PRORVD512,
29241 IX86_BUILTIN_PRORVQ512,
29242 IX86_BUILTIN_PSHUFD512,
29243 IX86_BUILTIN_PSLLD512,
29244 IX86_BUILTIN_PSLLDI512,
29245 IX86_BUILTIN_PSLLQ512,
29246 IX86_BUILTIN_PSLLQI512,
29247 IX86_BUILTIN_PSLLVV16SI,
29248 IX86_BUILTIN_PSLLVV8DI,
29249 IX86_BUILTIN_PSRAD512,
29250 IX86_BUILTIN_PSRADI512,
29251 IX86_BUILTIN_PSRAQ512,
29252 IX86_BUILTIN_PSRAQI512,
29253 IX86_BUILTIN_PSRAVV16SI,
29254 IX86_BUILTIN_PSRAVV8DI,
29255 IX86_BUILTIN_PSRLD512,
29256 IX86_BUILTIN_PSRLDI512,
29257 IX86_BUILTIN_PSRLQ512,
29258 IX86_BUILTIN_PSRLQI512,
29259 IX86_BUILTIN_PSRLVV16SI,
29260 IX86_BUILTIN_PSRLVV8DI,
29261 IX86_BUILTIN_PSUBD512,
29262 IX86_BUILTIN_PSUBQ512,
29263 IX86_BUILTIN_PTESTMD512,
29264 IX86_BUILTIN_PTESTMQ512,
29265 IX86_BUILTIN_PTESTNMD512,
29266 IX86_BUILTIN_PTESTNMQ512,
29267 IX86_BUILTIN_PUNPCKHDQ512,
29268 IX86_BUILTIN_PUNPCKHQDQ512,
29269 IX86_BUILTIN_PUNPCKLDQ512,
29270 IX86_BUILTIN_PUNPCKLQDQ512,
29271 IX86_BUILTIN_PXORD512,
29272 IX86_BUILTIN_PXORQ512,
29273 IX86_BUILTIN_RCP14PD512,
29274 IX86_BUILTIN_RCP14PS512,
29275 IX86_BUILTIN_RCP14SD,
29276 IX86_BUILTIN_RCP14SS,
29277 IX86_BUILTIN_RNDSCALEPD,
29278 IX86_BUILTIN_RNDSCALEPS,
29279 IX86_BUILTIN_RNDSCALESD,
29280 IX86_BUILTIN_RNDSCALESS,
29281 IX86_BUILTIN_RSQRT14PD512,
29282 IX86_BUILTIN_RSQRT14PS512,
29283 IX86_BUILTIN_RSQRT14SD,
29284 IX86_BUILTIN_RSQRT14SS,
29285 IX86_BUILTIN_SCALEFPD512,
29286 IX86_BUILTIN_SCALEFPS512,
29287 IX86_BUILTIN_SCALEFSD,
29288 IX86_BUILTIN_SCALEFSS,
29289 IX86_BUILTIN_SHUFPD512,
29290 IX86_BUILTIN_SHUFPS512,
29291 IX86_BUILTIN_SHUF_F32x4,
29292 IX86_BUILTIN_SHUF_F64x2,
29293 IX86_BUILTIN_SHUF_I32x4,
29294 IX86_BUILTIN_SHUF_I64x2,
29295 IX86_BUILTIN_SQRTPD512,
29296 IX86_BUILTIN_SQRTPD512_MASK,
29297 IX86_BUILTIN_SQRTPS512_MASK,
29298 IX86_BUILTIN_SQRTPS_NR512,
29299 IX86_BUILTIN_SQRTSD_ROUND,
29300 IX86_BUILTIN_SQRTSS_ROUND,
29301 IX86_BUILTIN_STOREAPD512,
29302 IX86_BUILTIN_STOREAPS512,
29303 IX86_BUILTIN_STOREDQUDI512,
29304 IX86_BUILTIN_STOREDQUSI512,
29305 IX86_BUILTIN_STOREUPD512,
29306 IX86_BUILTIN_STOREUPS512,
29307 IX86_BUILTIN_SUBPD512,
29308 IX86_BUILTIN_SUBPS512,
29309 IX86_BUILTIN_SUBSD_ROUND,
29310 IX86_BUILTIN_SUBSS_ROUND,
29311 IX86_BUILTIN_UCMPD512,
29312 IX86_BUILTIN_UCMPQ512,
29313 IX86_BUILTIN_UNPCKHPD512,
29314 IX86_BUILTIN_UNPCKHPS512,
29315 IX86_BUILTIN_UNPCKLPD512,
29316 IX86_BUILTIN_UNPCKLPS512,
29317 IX86_BUILTIN_VCVTSD2SI32,
29318 IX86_BUILTIN_VCVTSD2SI64,
29319 IX86_BUILTIN_VCVTSD2USI32,
29320 IX86_BUILTIN_VCVTSD2USI64,
29321 IX86_BUILTIN_VCVTSS2SI32,
29322 IX86_BUILTIN_VCVTSS2SI64,
29323 IX86_BUILTIN_VCVTSS2USI32,
29324 IX86_BUILTIN_VCVTSS2USI64,
29325 IX86_BUILTIN_VCVTTSD2SI32,
29326 IX86_BUILTIN_VCVTTSD2SI64,
29327 IX86_BUILTIN_VCVTTSD2USI32,
29328 IX86_BUILTIN_VCVTTSD2USI64,
29329 IX86_BUILTIN_VCVTTSS2SI32,
29330 IX86_BUILTIN_VCVTTSS2SI64,
29331 IX86_BUILTIN_VCVTTSS2USI32,
29332 IX86_BUILTIN_VCVTTSS2USI64,
29333 IX86_BUILTIN_VFMADDPD512_MASK,
29334 IX86_BUILTIN_VFMADDPD512_MASK3,
29335 IX86_BUILTIN_VFMADDPD512_MASKZ,
29336 IX86_BUILTIN_VFMADDPS512_MASK,
29337 IX86_BUILTIN_VFMADDPS512_MASK3,
29338 IX86_BUILTIN_VFMADDPS512_MASKZ,
29339 IX86_BUILTIN_VFMADDSD3_ROUND,
29340 IX86_BUILTIN_VFMADDSS3_ROUND,
29341 IX86_BUILTIN_VFMADDSUBPD512_MASK,
29342 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
29343 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
29344 IX86_BUILTIN_VFMADDSUBPS512_MASK,
29345 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
29346 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
29347 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
29348 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
29349 IX86_BUILTIN_VFMSUBPD512_MASK3,
29350 IX86_BUILTIN_VFMSUBPS512_MASK3,
29351 IX86_BUILTIN_VFMSUBSD3_MASK3,
29352 IX86_BUILTIN_VFMSUBSS3_MASK3,
29353 IX86_BUILTIN_VFNMADDPD512_MASK,
29354 IX86_BUILTIN_VFNMADDPS512_MASK,
29355 IX86_BUILTIN_VFNMSUBPD512_MASK,
29356 IX86_BUILTIN_VFNMSUBPD512_MASK3,
29357 IX86_BUILTIN_VFNMSUBPS512_MASK,
29358 IX86_BUILTIN_VFNMSUBPS512_MASK3,
29359 IX86_BUILTIN_VPCLZCNTD512,
29360 IX86_BUILTIN_VPCLZCNTQ512,
29361 IX86_BUILTIN_VPCONFLICTD512,
29362 IX86_BUILTIN_VPCONFLICTQ512,
29363 IX86_BUILTIN_VPERMDF512,
29364 IX86_BUILTIN_VPERMDI512,
29365 IX86_BUILTIN_VPERMI2VARD512,
29366 IX86_BUILTIN_VPERMI2VARPD512,
29367 IX86_BUILTIN_VPERMI2VARPS512,
29368 IX86_BUILTIN_VPERMI2VARQ512,
29369 IX86_BUILTIN_VPERMILPD512,
29370 IX86_BUILTIN_VPERMILPS512,
29371 IX86_BUILTIN_VPERMILVARPD512,
29372 IX86_BUILTIN_VPERMILVARPS512,
29373 IX86_BUILTIN_VPERMT2VARD512,
29374 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
29375 IX86_BUILTIN_VPERMT2VARPD512,
29376 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
29377 IX86_BUILTIN_VPERMT2VARPS512,
29378 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
29379 IX86_BUILTIN_VPERMT2VARQ512,
29380 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
29381 IX86_BUILTIN_VPERMVARDF512,
29382 IX86_BUILTIN_VPERMVARDI512,
29383 IX86_BUILTIN_VPERMVARSF512,
29384 IX86_BUILTIN_VPERMVARSI512,
29385 IX86_BUILTIN_VTERNLOGD512_MASK,
29386 IX86_BUILTIN_VTERNLOGD512_MASKZ,
29387 IX86_BUILTIN_VTERNLOGQ512_MASK,
29388 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
29390 /* Mask arithmetic operations */
29391 IX86_BUILTIN_KAND16,
29392 IX86_BUILTIN_KANDN16,
29393 IX86_BUILTIN_KNOT16,
29394 IX86_BUILTIN_KOR16,
29395 IX86_BUILTIN_KORTESTC16,
29396 IX86_BUILTIN_KORTESTZ16,
29397 IX86_BUILTIN_KUNPCKBW,
29398 IX86_BUILTIN_KXNOR16,
29399 IX86_BUILTIN_KXOR16,
29400 IX86_BUILTIN_KMOV16,
29402 /* AVX512VL. */
29403 IX86_BUILTIN_PMOVUSQD256_MEM,
29404 IX86_BUILTIN_PMOVUSQD128_MEM,
29405 IX86_BUILTIN_PMOVSQD256_MEM,
29406 IX86_BUILTIN_PMOVSQD128_MEM,
29407 IX86_BUILTIN_PMOVQD256_MEM,
29408 IX86_BUILTIN_PMOVQD128_MEM,
29409 IX86_BUILTIN_PMOVUSQW256_MEM,
29410 IX86_BUILTIN_PMOVUSQW128_MEM,
29411 IX86_BUILTIN_PMOVSQW256_MEM,
29412 IX86_BUILTIN_PMOVSQW128_MEM,
29413 IX86_BUILTIN_PMOVQW256_MEM,
29414 IX86_BUILTIN_PMOVQW128_MEM,
29415 IX86_BUILTIN_PMOVUSQB256_MEM,
29416 IX86_BUILTIN_PMOVUSQB128_MEM,
29417 IX86_BUILTIN_PMOVSQB256_MEM,
29418 IX86_BUILTIN_PMOVSQB128_MEM,
29419 IX86_BUILTIN_PMOVQB256_MEM,
29420 IX86_BUILTIN_PMOVQB128_MEM,
29421 IX86_BUILTIN_PMOVUSDW256_MEM,
29422 IX86_BUILTIN_PMOVUSDW128_MEM,
29423 IX86_BUILTIN_PMOVSDW256_MEM,
29424 IX86_BUILTIN_PMOVSDW128_MEM,
29425 IX86_BUILTIN_PMOVDW256_MEM,
29426 IX86_BUILTIN_PMOVDW128_MEM,
29427 IX86_BUILTIN_PMOVUSDB256_MEM,
29428 IX86_BUILTIN_PMOVUSDB128_MEM,
29429 IX86_BUILTIN_PMOVSDB256_MEM,
29430 IX86_BUILTIN_PMOVSDB128_MEM,
29431 IX86_BUILTIN_PMOVDB256_MEM,
29432 IX86_BUILTIN_PMOVDB128_MEM,
29433 IX86_BUILTIN_MOVDQA64LOAD256_MASK,
29434 IX86_BUILTIN_MOVDQA64LOAD128_MASK,
29435 IX86_BUILTIN_MOVDQA32LOAD256_MASK,
29436 IX86_BUILTIN_MOVDQA32LOAD128_MASK,
29437 IX86_BUILTIN_MOVDQA64STORE256_MASK,
29438 IX86_BUILTIN_MOVDQA64STORE128_MASK,
29439 IX86_BUILTIN_MOVDQA32STORE256_MASK,
29440 IX86_BUILTIN_MOVDQA32STORE128_MASK,
29441 IX86_BUILTIN_LOADAPD256_MASK,
29442 IX86_BUILTIN_LOADAPD128_MASK,
29443 IX86_BUILTIN_LOADAPS256_MASK,
29444 IX86_BUILTIN_LOADAPS128_MASK,
29445 IX86_BUILTIN_STOREAPD256_MASK,
29446 IX86_BUILTIN_STOREAPD128_MASK,
29447 IX86_BUILTIN_STOREAPS256_MASK,
29448 IX86_BUILTIN_STOREAPS128_MASK,
29449 IX86_BUILTIN_LOADUPD256_MASK,
29450 IX86_BUILTIN_LOADUPD128_MASK,
29451 IX86_BUILTIN_LOADUPS256_MASK,
29452 IX86_BUILTIN_LOADUPS128_MASK,
29453 IX86_BUILTIN_STOREUPD256_MASK,
29454 IX86_BUILTIN_STOREUPD128_MASK,
29455 IX86_BUILTIN_STOREUPS256_MASK,
29456 IX86_BUILTIN_STOREUPS128_MASK,
29457 IX86_BUILTIN_LOADDQUDI256_MASK,
29458 IX86_BUILTIN_LOADDQUDI128_MASK,
29459 IX86_BUILTIN_LOADDQUSI256_MASK,
29460 IX86_BUILTIN_LOADDQUSI128_MASK,
29461 IX86_BUILTIN_LOADDQUHI256_MASK,
29462 IX86_BUILTIN_LOADDQUHI128_MASK,
29463 IX86_BUILTIN_LOADDQUQI256_MASK,
29464 IX86_BUILTIN_LOADDQUQI128_MASK,
29465 IX86_BUILTIN_STOREDQUDI256_MASK,
29466 IX86_BUILTIN_STOREDQUDI128_MASK,
29467 IX86_BUILTIN_STOREDQUSI256_MASK,
29468 IX86_BUILTIN_STOREDQUSI128_MASK,
29469 IX86_BUILTIN_STOREDQUHI256_MASK,
29470 IX86_BUILTIN_STOREDQUHI128_MASK,
29471 IX86_BUILTIN_STOREDQUQI256_MASK,
29472 IX86_BUILTIN_STOREDQUQI128_MASK,
29473 IX86_BUILTIN_COMPRESSPDSTORE256,
29474 IX86_BUILTIN_COMPRESSPDSTORE128,
29475 IX86_BUILTIN_COMPRESSPSSTORE256,
29476 IX86_BUILTIN_COMPRESSPSSTORE128,
29477 IX86_BUILTIN_PCOMPRESSQSTORE256,
29478 IX86_BUILTIN_PCOMPRESSQSTORE128,
29479 IX86_BUILTIN_PCOMPRESSDSTORE256,
29480 IX86_BUILTIN_PCOMPRESSDSTORE128,
29481 IX86_BUILTIN_EXPANDPDLOAD256,
29482 IX86_BUILTIN_EXPANDPDLOAD128,
29483 IX86_BUILTIN_EXPANDPSLOAD256,
29484 IX86_BUILTIN_EXPANDPSLOAD128,
29485 IX86_BUILTIN_PEXPANDQLOAD256,
29486 IX86_BUILTIN_PEXPANDQLOAD128,
29487 IX86_BUILTIN_PEXPANDDLOAD256,
29488 IX86_BUILTIN_PEXPANDDLOAD128,
29489 IX86_BUILTIN_EXPANDPDLOAD256Z,
29490 IX86_BUILTIN_EXPANDPDLOAD128Z,
29491 IX86_BUILTIN_EXPANDPSLOAD256Z,
29492 IX86_BUILTIN_EXPANDPSLOAD128Z,
29493 IX86_BUILTIN_PEXPANDQLOAD256Z,
29494 IX86_BUILTIN_PEXPANDQLOAD128Z,
29495 IX86_BUILTIN_PEXPANDDLOAD256Z,
29496 IX86_BUILTIN_PEXPANDDLOAD128Z,
29497 IX86_BUILTIN_PALIGNR256_MASK,
29498 IX86_BUILTIN_PALIGNR128_MASK,
29499 IX86_BUILTIN_MOVDQA64_256_MASK,
29500 IX86_BUILTIN_MOVDQA64_128_MASK,
29501 IX86_BUILTIN_MOVDQA32_256_MASK,
29502 IX86_BUILTIN_MOVDQA32_128_MASK,
29503 IX86_BUILTIN_MOVAPD256_MASK,
29504 IX86_BUILTIN_MOVAPD128_MASK,
29505 IX86_BUILTIN_MOVAPS256_MASK,
29506 IX86_BUILTIN_MOVAPS128_MASK,
29507 IX86_BUILTIN_MOVDQUHI256_MASK,
29508 IX86_BUILTIN_MOVDQUHI128_MASK,
29509 IX86_BUILTIN_MOVDQUQI256_MASK,
29510 IX86_BUILTIN_MOVDQUQI128_MASK,
29511 IX86_BUILTIN_MINPS128_MASK,
29512 IX86_BUILTIN_MAXPS128_MASK,
29513 IX86_BUILTIN_MINPD128_MASK,
29514 IX86_BUILTIN_MAXPD128_MASK,
29515 IX86_BUILTIN_MAXPD256_MASK,
29516 IX86_BUILTIN_MAXPS256_MASK,
29517 IX86_BUILTIN_MINPD256_MASK,
29518 IX86_BUILTIN_MINPS256_MASK,
29519 IX86_BUILTIN_MULPS128_MASK,
29520 IX86_BUILTIN_DIVPS128_MASK,
29521 IX86_BUILTIN_MULPD128_MASK,
29522 IX86_BUILTIN_DIVPD128_MASK,
29523 IX86_BUILTIN_DIVPD256_MASK,
29524 IX86_BUILTIN_DIVPS256_MASK,
29525 IX86_BUILTIN_MULPD256_MASK,
29526 IX86_BUILTIN_MULPS256_MASK,
29527 IX86_BUILTIN_ADDPD128_MASK,
29528 IX86_BUILTIN_ADDPD256_MASK,
29529 IX86_BUILTIN_ADDPS128_MASK,
29530 IX86_BUILTIN_ADDPS256_MASK,
29531 IX86_BUILTIN_SUBPD128_MASK,
29532 IX86_BUILTIN_SUBPD256_MASK,
29533 IX86_BUILTIN_SUBPS128_MASK,
29534 IX86_BUILTIN_SUBPS256_MASK,
29535 IX86_BUILTIN_XORPD256_MASK,
29536 IX86_BUILTIN_XORPD128_MASK,
29537 IX86_BUILTIN_XORPS256_MASK,
29538 IX86_BUILTIN_XORPS128_MASK,
29539 IX86_BUILTIN_ORPD256_MASK,
29540 IX86_BUILTIN_ORPD128_MASK,
29541 IX86_BUILTIN_ORPS256_MASK,
29542 IX86_BUILTIN_ORPS128_MASK,
29543 IX86_BUILTIN_BROADCASTF32x2_256,
29544 IX86_BUILTIN_BROADCASTI32x2_256,
29545 IX86_BUILTIN_BROADCASTI32x2_128,
29546 IX86_BUILTIN_BROADCASTF64X2_256,
29547 IX86_BUILTIN_BROADCASTI64X2_256,
29548 IX86_BUILTIN_BROADCASTF32X4_256,
29549 IX86_BUILTIN_BROADCASTI32X4_256,
29550 IX86_BUILTIN_EXTRACTF32X4_256,
29551 IX86_BUILTIN_EXTRACTI32X4_256,
29552 IX86_BUILTIN_DBPSADBW256,
29553 IX86_BUILTIN_DBPSADBW128,
29554 IX86_BUILTIN_CVTTPD2QQ256,
29555 IX86_BUILTIN_CVTTPD2QQ128,
29556 IX86_BUILTIN_CVTTPD2UQQ256,
29557 IX86_BUILTIN_CVTTPD2UQQ128,
29558 IX86_BUILTIN_CVTPD2QQ256,
29559 IX86_BUILTIN_CVTPD2QQ128,
29560 IX86_BUILTIN_CVTPD2UQQ256,
29561 IX86_BUILTIN_CVTPD2UQQ128,
29562 IX86_BUILTIN_CVTPD2UDQ256_MASK,
29563 IX86_BUILTIN_CVTPD2UDQ128_MASK,
29564 IX86_BUILTIN_CVTTPS2QQ256,
29565 IX86_BUILTIN_CVTTPS2QQ128,
29566 IX86_BUILTIN_CVTTPS2UQQ256,
29567 IX86_BUILTIN_CVTTPS2UQQ128,
29568 IX86_BUILTIN_CVTTPS2DQ256_MASK,
29569 IX86_BUILTIN_CVTTPS2DQ128_MASK,
29570 IX86_BUILTIN_CVTTPS2UDQ256,
29571 IX86_BUILTIN_CVTTPS2UDQ128,
29572 IX86_BUILTIN_CVTTPD2DQ256_MASK,
29573 IX86_BUILTIN_CVTTPD2DQ128_MASK,
29574 IX86_BUILTIN_CVTTPD2UDQ256_MASK,
29575 IX86_BUILTIN_CVTTPD2UDQ128_MASK,
29576 IX86_BUILTIN_CVTPD2DQ256_MASK,
29577 IX86_BUILTIN_CVTPD2DQ128_MASK,
29578 IX86_BUILTIN_CVTDQ2PD256_MASK,
29579 IX86_BUILTIN_CVTDQ2PD128_MASK,
29580 IX86_BUILTIN_CVTUDQ2PD256_MASK,
29581 IX86_BUILTIN_CVTUDQ2PD128_MASK,
29582 IX86_BUILTIN_CVTDQ2PS256_MASK,
29583 IX86_BUILTIN_CVTDQ2PS128_MASK,
29584 IX86_BUILTIN_CVTUDQ2PS256_MASK,
29585 IX86_BUILTIN_CVTUDQ2PS128_MASK,
29586 IX86_BUILTIN_CVTPS2PD256_MASK,
29587 IX86_BUILTIN_CVTPS2PD128_MASK,
29588 IX86_BUILTIN_PBROADCASTB256_MASK,
29589 IX86_BUILTIN_PBROADCASTB256_GPR_MASK,
29590 IX86_BUILTIN_PBROADCASTB128_MASK,
29591 IX86_BUILTIN_PBROADCASTB128_GPR_MASK,
29592 IX86_BUILTIN_PBROADCASTW256_MASK,
29593 IX86_BUILTIN_PBROADCASTW256_GPR_MASK,
29594 IX86_BUILTIN_PBROADCASTW128_MASK,
29595 IX86_BUILTIN_PBROADCASTW128_GPR_MASK,
29596 IX86_BUILTIN_PBROADCASTD256_MASK,
29597 IX86_BUILTIN_PBROADCASTD256_GPR_MASK,
29598 IX86_BUILTIN_PBROADCASTD128_MASK,
29599 IX86_BUILTIN_PBROADCASTD128_GPR_MASK,
29600 IX86_BUILTIN_PBROADCASTQ256_MASK,
29601 IX86_BUILTIN_PBROADCASTQ256_GPR_MASK,
29602 IX86_BUILTIN_PBROADCASTQ128_MASK,
29603 IX86_BUILTIN_PBROADCASTQ128_GPR_MASK,
29604 IX86_BUILTIN_BROADCASTSS256,
29605 IX86_BUILTIN_BROADCASTSS128,
29606 IX86_BUILTIN_BROADCASTSD256,
29607 IX86_BUILTIN_EXTRACTF64X2_256,
29608 IX86_BUILTIN_EXTRACTI64X2_256,
29609 IX86_BUILTIN_INSERTF32X4_256,
29610 IX86_BUILTIN_INSERTI32X4_256,
29611 IX86_BUILTIN_PMOVSXBW256_MASK,
29612 IX86_BUILTIN_PMOVSXBW128_MASK,
29613 IX86_BUILTIN_PMOVSXBD256_MASK,
29614 IX86_BUILTIN_PMOVSXBD128_MASK,
29615 IX86_BUILTIN_PMOVSXBQ256_MASK,
29616 IX86_BUILTIN_PMOVSXBQ128_MASK,
29617 IX86_BUILTIN_PMOVSXWD256_MASK,
29618 IX86_BUILTIN_PMOVSXWD128_MASK,
29619 IX86_BUILTIN_PMOVSXWQ256_MASK,
29620 IX86_BUILTIN_PMOVSXWQ128_MASK,
29621 IX86_BUILTIN_PMOVSXDQ256_MASK,
29622 IX86_BUILTIN_PMOVSXDQ128_MASK,
29623 IX86_BUILTIN_PMOVZXBW256_MASK,
29624 IX86_BUILTIN_PMOVZXBW128_MASK,
29625 IX86_BUILTIN_PMOVZXBD256_MASK,
29626 IX86_BUILTIN_PMOVZXBD128_MASK,
29627 IX86_BUILTIN_PMOVZXBQ256_MASK,
29628 IX86_BUILTIN_PMOVZXBQ128_MASK,
29629 IX86_BUILTIN_PMOVZXWD256_MASK,
29630 IX86_BUILTIN_PMOVZXWD128_MASK,
29631 IX86_BUILTIN_PMOVZXWQ256_MASK,
29632 IX86_BUILTIN_PMOVZXWQ128_MASK,
29633 IX86_BUILTIN_PMOVZXDQ256_MASK,
29634 IX86_BUILTIN_PMOVZXDQ128_MASK,
29635 IX86_BUILTIN_REDUCEPD256_MASK,
29636 IX86_BUILTIN_REDUCEPD128_MASK,
29637 IX86_BUILTIN_REDUCEPS256_MASK,
29638 IX86_BUILTIN_REDUCEPS128_MASK,
29639 IX86_BUILTIN_REDUCESD_MASK,
29640 IX86_BUILTIN_REDUCESS_MASK,
29641 IX86_BUILTIN_VPERMVARHI256_MASK,
29642 IX86_BUILTIN_VPERMVARHI128_MASK,
29643 IX86_BUILTIN_VPERMT2VARHI256,
29644 IX86_BUILTIN_VPERMT2VARHI256_MASKZ,
29645 IX86_BUILTIN_VPERMT2VARHI128,
29646 IX86_BUILTIN_VPERMT2VARHI128_MASKZ,
29647 IX86_BUILTIN_VPERMI2VARHI256,
29648 IX86_BUILTIN_VPERMI2VARHI128,
29649 IX86_BUILTIN_RCP14PD256,
29650 IX86_BUILTIN_RCP14PD128,
29651 IX86_BUILTIN_RCP14PS256,
29652 IX86_BUILTIN_RCP14PS128,
29653 IX86_BUILTIN_RSQRT14PD256_MASK,
29654 IX86_BUILTIN_RSQRT14PD128_MASK,
29655 IX86_BUILTIN_RSQRT14PS256_MASK,
29656 IX86_BUILTIN_RSQRT14PS128_MASK,
29657 IX86_BUILTIN_SQRTPD256_MASK,
29658 IX86_BUILTIN_SQRTPD128_MASK,
29659 IX86_BUILTIN_SQRTPS256_MASK,
29660 IX86_BUILTIN_SQRTPS128_MASK,
29661 IX86_BUILTIN_PADDB128_MASK,
29662 IX86_BUILTIN_PADDW128_MASK,
29663 IX86_BUILTIN_PADDD128_MASK,
29664 IX86_BUILTIN_PADDQ128_MASK,
29665 IX86_BUILTIN_PSUBB128_MASK,
29666 IX86_BUILTIN_PSUBW128_MASK,
29667 IX86_BUILTIN_PSUBD128_MASK,
29668 IX86_BUILTIN_PSUBQ128_MASK,
29669 IX86_BUILTIN_PADDSB128_MASK,
29670 IX86_BUILTIN_PADDSW128_MASK,
29671 IX86_BUILTIN_PSUBSB128_MASK,
29672 IX86_BUILTIN_PSUBSW128_MASK,
29673 IX86_BUILTIN_PADDUSB128_MASK,
29674 IX86_BUILTIN_PADDUSW128_MASK,
29675 IX86_BUILTIN_PSUBUSB128_MASK,
29676 IX86_BUILTIN_PSUBUSW128_MASK,
29677 IX86_BUILTIN_PADDB256_MASK,
29678 IX86_BUILTIN_PADDW256_MASK,
29679 IX86_BUILTIN_PADDD256_MASK,
29680 IX86_BUILTIN_PADDQ256_MASK,
29681 IX86_BUILTIN_PADDSB256_MASK,
29682 IX86_BUILTIN_PADDSW256_MASK,
29683 IX86_BUILTIN_PADDUSB256_MASK,
29684 IX86_BUILTIN_PADDUSW256_MASK,
29685 IX86_BUILTIN_PSUBB256_MASK,
29686 IX86_BUILTIN_PSUBW256_MASK,
29687 IX86_BUILTIN_PSUBD256_MASK,
29688 IX86_BUILTIN_PSUBQ256_MASK,
29689 IX86_BUILTIN_PSUBSB256_MASK,
29690 IX86_BUILTIN_PSUBSW256_MASK,
29691 IX86_BUILTIN_PSUBUSB256_MASK,
29692 IX86_BUILTIN_PSUBUSW256_MASK,
29693 IX86_BUILTIN_SHUF_F64x2_256,
29694 IX86_BUILTIN_SHUF_I64x2_256,
29695 IX86_BUILTIN_SHUF_I32x4_256,
29696 IX86_BUILTIN_SHUF_F32x4_256,
29697 IX86_BUILTIN_PMOVWB128,
29698 IX86_BUILTIN_PMOVWB256,
29699 IX86_BUILTIN_PMOVSWB128,
29700 IX86_BUILTIN_PMOVSWB256,
29701 IX86_BUILTIN_PMOVUSWB128,
29702 IX86_BUILTIN_PMOVUSWB256,
29703 IX86_BUILTIN_PMOVDB128,
29704 IX86_BUILTIN_PMOVDB256,
29705 IX86_BUILTIN_PMOVSDB128,
29706 IX86_BUILTIN_PMOVSDB256,
29707 IX86_BUILTIN_PMOVUSDB128,
29708 IX86_BUILTIN_PMOVUSDB256,
29709 IX86_BUILTIN_PMOVDW128,
29710 IX86_BUILTIN_PMOVDW256,
29711 IX86_BUILTIN_PMOVSDW128,
29712 IX86_BUILTIN_PMOVSDW256,
29713 IX86_BUILTIN_PMOVUSDW128,
29714 IX86_BUILTIN_PMOVUSDW256,
29715 IX86_BUILTIN_PMOVQB128,
29716 IX86_BUILTIN_PMOVQB256,
29717 IX86_BUILTIN_PMOVSQB128,
29718 IX86_BUILTIN_PMOVSQB256,
29719 IX86_BUILTIN_PMOVUSQB128,
29720 IX86_BUILTIN_PMOVUSQB256,
29721 IX86_BUILTIN_PMOVQW128,
29722 IX86_BUILTIN_PMOVQW256,
29723 IX86_BUILTIN_PMOVSQW128,
29724 IX86_BUILTIN_PMOVSQW256,
29725 IX86_BUILTIN_PMOVUSQW128,
29726 IX86_BUILTIN_PMOVUSQW256,
29727 IX86_BUILTIN_PMOVQD128,
29728 IX86_BUILTIN_PMOVQD256,
29729 IX86_BUILTIN_PMOVSQD128,
29730 IX86_BUILTIN_PMOVSQD256,
29731 IX86_BUILTIN_PMOVUSQD128,
29732 IX86_BUILTIN_PMOVUSQD256,
29733 IX86_BUILTIN_RANGEPD256,
29734 IX86_BUILTIN_RANGEPD128,
29735 IX86_BUILTIN_RANGEPS256,
29736 IX86_BUILTIN_RANGEPS128,
29737 IX86_BUILTIN_GETEXPPS256,
29738 IX86_BUILTIN_GETEXPPD256,
29739 IX86_BUILTIN_GETEXPPS128,
29740 IX86_BUILTIN_GETEXPPD128,
29741 IX86_BUILTIN_FIXUPIMMPD256_MASK,
29742 IX86_BUILTIN_FIXUPIMMPD256_MASKZ,
29743 IX86_BUILTIN_FIXUPIMMPS256_MASK,
29744 IX86_BUILTIN_FIXUPIMMPS256_MASKZ,
29745 IX86_BUILTIN_FIXUPIMMPD128_MASK,
29746 IX86_BUILTIN_FIXUPIMMPD128_MASKZ,
29747 IX86_BUILTIN_FIXUPIMMPS128_MASK,
29748 IX86_BUILTIN_FIXUPIMMPS128_MASKZ,
29749 IX86_BUILTIN_PABSQ256,
29750 IX86_BUILTIN_PABSQ128,
29751 IX86_BUILTIN_PABSD256_MASK,
29752 IX86_BUILTIN_PABSD128_MASK,
29753 IX86_BUILTIN_PMULHRSW256_MASK,
29754 IX86_BUILTIN_PMULHRSW128_MASK,
29755 IX86_BUILTIN_PMULHUW128_MASK,
29756 IX86_BUILTIN_PMULHUW256_MASK,
29757 IX86_BUILTIN_PMULHW256_MASK,
29758 IX86_BUILTIN_PMULHW128_MASK,
29759 IX86_BUILTIN_PMULLW256_MASK,
29760 IX86_BUILTIN_PMULLW128_MASK,
29761 IX86_BUILTIN_PMULLQ256,
29762 IX86_BUILTIN_PMULLQ128,
29763 IX86_BUILTIN_ANDPD256_MASK,
29764 IX86_BUILTIN_ANDPD128_MASK,
29765 IX86_BUILTIN_ANDPS256_MASK,
29766 IX86_BUILTIN_ANDPS128_MASK,
29767 IX86_BUILTIN_ANDNPD256_MASK,
29768 IX86_BUILTIN_ANDNPD128_MASK,
29769 IX86_BUILTIN_ANDNPS256_MASK,
29770 IX86_BUILTIN_ANDNPS128_MASK,
29771 IX86_BUILTIN_PSLLWI128_MASK,
29772 IX86_BUILTIN_PSLLDI128_MASK,
29773 IX86_BUILTIN_PSLLQI128_MASK,
29774 IX86_BUILTIN_PSLLW128_MASK,
29775 IX86_BUILTIN_PSLLD128_MASK,
29776 IX86_BUILTIN_PSLLQ128_MASK,
29777 IX86_BUILTIN_PSLLWI256_MASK ,
29778 IX86_BUILTIN_PSLLW256_MASK,
29779 IX86_BUILTIN_PSLLDI256_MASK,
29780 IX86_BUILTIN_PSLLD256_MASK,
29781 IX86_BUILTIN_PSLLQI256_MASK,
29782 IX86_BUILTIN_PSLLQ256_MASK,
29783 IX86_BUILTIN_PSRADI128_MASK,
29784 IX86_BUILTIN_PSRAD128_MASK,
29785 IX86_BUILTIN_PSRADI256_MASK,
29786 IX86_BUILTIN_PSRAD256_MASK,
29787 IX86_BUILTIN_PSRAQI128_MASK,
29788 IX86_BUILTIN_PSRAQ128_MASK,
29789 IX86_BUILTIN_PSRAQI256_MASK,
29790 IX86_BUILTIN_PSRAQ256_MASK,
29791 IX86_BUILTIN_PANDD256,
29792 IX86_BUILTIN_PANDD128,
29793 IX86_BUILTIN_PSRLDI128_MASK,
29794 IX86_BUILTIN_PSRLD128_MASK,
29795 IX86_BUILTIN_PSRLDI256_MASK,
29796 IX86_BUILTIN_PSRLD256_MASK,
29797 IX86_BUILTIN_PSRLQI128_MASK,
29798 IX86_BUILTIN_PSRLQ128_MASK,
29799 IX86_BUILTIN_PSRLQI256_MASK,
29800 IX86_BUILTIN_PSRLQ256_MASK,
29801 IX86_BUILTIN_PANDQ256,
29802 IX86_BUILTIN_PANDQ128,
29803 IX86_BUILTIN_PANDND256,
29804 IX86_BUILTIN_PANDND128,
29805 IX86_BUILTIN_PANDNQ256,
29806 IX86_BUILTIN_PANDNQ128,
29807 IX86_BUILTIN_PORD256,
29808 IX86_BUILTIN_PORD128,
29809 IX86_BUILTIN_PORQ256,
29810 IX86_BUILTIN_PORQ128,
29811 IX86_BUILTIN_PXORD256,
29812 IX86_BUILTIN_PXORD128,
29813 IX86_BUILTIN_PXORQ256,
29814 IX86_BUILTIN_PXORQ128,
29815 IX86_BUILTIN_PACKSSWB256_MASK,
29816 IX86_BUILTIN_PACKSSWB128_MASK,
29817 IX86_BUILTIN_PACKUSWB256_MASK,
29818 IX86_BUILTIN_PACKUSWB128_MASK,
29819 IX86_BUILTIN_RNDSCALEPS256,
29820 IX86_BUILTIN_RNDSCALEPD256,
29821 IX86_BUILTIN_RNDSCALEPS128,
29822 IX86_BUILTIN_RNDSCALEPD128,
29823 IX86_BUILTIN_VTERNLOGQ256_MASK,
29824 IX86_BUILTIN_VTERNLOGQ256_MASKZ,
29825 IX86_BUILTIN_VTERNLOGD256_MASK,
29826 IX86_BUILTIN_VTERNLOGD256_MASKZ,
29827 IX86_BUILTIN_VTERNLOGQ128_MASK,
29828 IX86_BUILTIN_VTERNLOGQ128_MASKZ,
29829 IX86_BUILTIN_VTERNLOGD128_MASK,
29830 IX86_BUILTIN_VTERNLOGD128_MASKZ,
29831 IX86_BUILTIN_SCALEFPD256,
29832 IX86_BUILTIN_SCALEFPS256,
29833 IX86_BUILTIN_SCALEFPD128,
29834 IX86_BUILTIN_SCALEFPS128,
29835 IX86_BUILTIN_VFMADDPD256_MASK,
29836 IX86_BUILTIN_VFMADDPD256_MASK3,
29837 IX86_BUILTIN_VFMADDPD256_MASKZ,
29838 IX86_BUILTIN_VFMADDPD128_MASK,
29839 IX86_BUILTIN_VFMADDPD128_MASK3,
29840 IX86_BUILTIN_VFMADDPD128_MASKZ,
29841 IX86_BUILTIN_VFMADDPS256_MASK,
29842 IX86_BUILTIN_VFMADDPS256_MASK3,
29843 IX86_BUILTIN_VFMADDPS256_MASKZ,
29844 IX86_BUILTIN_VFMADDPS128_MASK,
29845 IX86_BUILTIN_VFMADDPS128_MASK3,
29846 IX86_BUILTIN_VFMADDPS128_MASKZ,
29847 IX86_BUILTIN_VFMSUBPD256_MASK3,
29848 IX86_BUILTIN_VFMSUBPD128_MASK3,
29849 IX86_BUILTIN_VFMSUBPS256_MASK3,
29850 IX86_BUILTIN_VFMSUBPS128_MASK3,
29851 IX86_BUILTIN_VFNMADDPD256_MASK,
29852 IX86_BUILTIN_VFNMADDPD128_MASK,
29853 IX86_BUILTIN_VFNMADDPS256_MASK,
29854 IX86_BUILTIN_VFNMADDPS128_MASK,
29855 IX86_BUILTIN_VFNMSUBPD256_MASK,
29856 IX86_BUILTIN_VFNMSUBPD256_MASK3,
29857 IX86_BUILTIN_VFNMSUBPD128_MASK,
29858 IX86_BUILTIN_VFNMSUBPD128_MASK3,
29859 IX86_BUILTIN_VFNMSUBPS256_MASK,
29860 IX86_BUILTIN_VFNMSUBPS256_MASK3,
29861 IX86_BUILTIN_VFNMSUBPS128_MASK,
29862 IX86_BUILTIN_VFNMSUBPS128_MASK3,
29863 IX86_BUILTIN_VFMADDSUBPD256_MASK,
29864 IX86_BUILTIN_VFMADDSUBPD256_MASK3,
29865 IX86_BUILTIN_VFMADDSUBPD256_MASKZ,
29866 IX86_BUILTIN_VFMADDSUBPD128_MASK,
29867 IX86_BUILTIN_VFMADDSUBPD128_MASK3,
29868 IX86_BUILTIN_VFMADDSUBPD128_MASKZ,
29869 IX86_BUILTIN_VFMADDSUBPS256_MASK,
29870 IX86_BUILTIN_VFMADDSUBPS256_MASK3,
29871 IX86_BUILTIN_VFMADDSUBPS256_MASKZ,
29872 IX86_BUILTIN_VFMADDSUBPS128_MASK,
29873 IX86_BUILTIN_VFMADDSUBPS128_MASK3,
29874 IX86_BUILTIN_VFMADDSUBPS128_MASKZ,
29875 IX86_BUILTIN_VFMSUBADDPD256_MASK3,
29876 IX86_BUILTIN_VFMSUBADDPD128_MASK3,
29877 IX86_BUILTIN_VFMSUBADDPS256_MASK3,
29878 IX86_BUILTIN_VFMSUBADDPS128_MASK3,
29879 IX86_BUILTIN_INSERTF64X2_256,
29880 IX86_BUILTIN_INSERTI64X2_256,
29881 IX86_BUILTIN_PSRAVV16HI,
29882 IX86_BUILTIN_PSRAVV8HI,
29883 IX86_BUILTIN_PMADDUBSW256_MASK,
29884 IX86_BUILTIN_PMADDUBSW128_MASK,
29885 IX86_BUILTIN_PMADDWD256_MASK,
29886 IX86_BUILTIN_PMADDWD128_MASK,
29887 IX86_BUILTIN_PSRLVV16HI,
29888 IX86_BUILTIN_PSRLVV8HI,
29889 IX86_BUILTIN_CVTPS2DQ256_MASK,
29890 IX86_BUILTIN_CVTPS2DQ128_MASK,
29891 IX86_BUILTIN_CVTPS2UDQ256,
29892 IX86_BUILTIN_CVTPS2UDQ128,
29893 IX86_BUILTIN_CVTPS2QQ256,
29894 IX86_BUILTIN_CVTPS2QQ128,
29895 IX86_BUILTIN_CVTPS2UQQ256,
29896 IX86_BUILTIN_CVTPS2UQQ128,
29897 IX86_BUILTIN_GETMANTPS256,
29898 IX86_BUILTIN_GETMANTPS128,
29899 IX86_BUILTIN_GETMANTPD256,
29900 IX86_BUILTIN_GETMANTPD128,
29901 IX86_BUILTIN_MOVDDUP256_MASK,
29902 IX86_BUILTIN_MOVDDUP128_MASK,
29903 IX86_BUILTIN_MOVSHDUP256_MASK,
29904 IX86_BUILTIN_MOVSHDUP128_MASK,
29905 IX86_BUILTIN_MOVSLDUP256_MASK,
29906 IX86_BUILTIN_MOVSLDUP128_MASK,
29907 IX86_BUILTIN_CVTQQ2PS256,
29908 IX86_BUILTIN_CVTQQ2PS128,
29909 IX86_BUILTIN_CVTUQQ2PS256,
29910 IX86_BUILTIN_CVTUQQ2PS128,
29911 IX86_BUILTIN_CVTQQ2PD256,
29912 IX86_BUILTIN_CVTQQ2PD128,
29913 IX86_BUILTIN_CVTUQQ2PD256,
29914 IX86_BUILTIN_CVTUQQ2PD128,
29915 IX86_BUILTIN_VPERMT2VARQ256,
29916 IX86_BUILTIN_VPERMT2VARQ256_MASKZ,
29917 IX86_BUILTIN_VPERMT2VARD256,
29918 IX86_BUILTIN_VPERMT2VARD256_MASKZ,
29919 IX86_BUILTIN_VPERMI2VARQ256,
29920 IX86_BUILTIN_VPERMI2VARD256,
29921 IX86_BUILTIN_VPERMT2VARPD256,
29922 IX86_BUILTIN_VPERMT2VARPD256_MASKZ,
29923 IX86_BUILTIN_VPERMT2VARPS256,
29924 IX86_BUILTIN_VPERMT2VARPS256_MASKZ,
29925 IX86_BUILTIN_VPERMI2VARPD256,
29926 IX86_BUILTIN_VPERMI2VARPS256,
29927 IX86_BUILTIN_VPERMT2VARQ128,
29928 IX86_BUILTIN_VPERMT2VARQ128_MASKZ,
29929 IX86_BUILTIN_VPERMT2VARD128,
29930 IX86_BUILTIN_VPERMT2VARD128_MASKZ,
29931 IX86_BUILTIN_VPERMI2VARQ128,
29932 IX86_BUILTIN_VPERMI2VARD128,
29933 IX86_BUILTIN_VPERMT2VARPD128,
29934 IX86_BUILTIN_VPERMT2VARPD128_MASKZ,
29935 IX86_BUILTIN_VPERMT2VARPS128,
29936 IX86_BUILTIN_VPERMT2VARPS128_MASKZ,
29937 IX86_BUILTIN_VPERMI2VARPD128,
29938 IX86_BUILTIN_VPERMI2VARPS128,
29939 IX86_BUILTIN_PSHUFB256_MASK,
29940 IX86_BUILTIN_PSHUFB128_MASK,
29941 IX86_BUILTIN_PSHUFHW256_MASK,
29942 IX86_BUILTIN_PSHUFHW128_MASK,
29943 IX86_BUILTIN_PSHUFLW256_MASK,
29944 IX86_BUILTIN_PSHUFLW128_MASK,
29945 IX86_BUILTIN_PSHUFD256_MASK,
29946 IX86_BUILTIN_PSHUFD128_MASK,
29947 IX86_BUILTIN_SHUFPD256_MASK,
29948 IX86_BUILTIN_SHUFPD128_MASK,
29949 IX86_BUILTIN_SHUFPS256_MASK,
29950 IX86_BUILTIN_SHUFPS128_MASK,
29951 IX86_BUILTIN_PROLVQ256,
29952 IX86_BUILTIN_PROLVQ128,
29953 IX86_BUILTIN_PROLQ256,
29954 IX86_BUILTIN_PROLQ128,
29955 IX86_BUILTIN_PRORVQ256,
29956 IX86_BUILTIN_PRORVQ128,
29957 IX86_BUILTIN_PRORQ256,
29958 IX86_BUILTIN_PRORQ128,
29959 IX86_BUILTIN_PSRAVQ128,
29960 IX86_BUILTIN_PSRAVQ256,
29961 IX86_BUILTIN_PSLLVV4DI_MASK,
29962 IX86_BUILTIN_PSLLVV2DI_MASK,
29963 IX86_BUILTIN_PSLLVV8SI_MASK,
29964 IX86_BUILTIN_PSLLVV4SI_MASK,
29965 IX86_BUILTIN_PSRAVV8SI_MASK,
29966 IX86_BUILTIN_PSRAVV4SI_MASK,
29967 IX86_BUILTIN_PSRLVV4DI_MASK,
29968 IX86_BUILTIN_PSRLVV2DI_MASK,
29969 IX86_BUILTIN_PSRLVV8SI_MASK,
29970 IX86_BUILTIN_PSRLVV4SI_MASK,
29971 IX86_BUILTIN_PSRAWI256_MASK,
29972 IX86_BUILTIN_PSRAW256_MASK,
29973 IX86_BUILTIN_PSRAWI128_MASK,
29974 IX86_BUILTIN_PSRAW128_MASK,
29975 IX86_BUILTIN_PSRLWI256_MASK,
29976 IX86_BUILTIN_PSRLW256_MASK,
29977 IX86_BUILTIN_PSRLWI128_MASK,
29978 IX86_BUILTIN_PSRLW128_MASK,
29979 IX86_BUILTIN_PRORVD256,
29980 IX86_BUILTIN_PROLVD256,
29981 IX86_BUILTIN_PRORD256,
29982 IX86_BUILTIN_PROLD256,
29983 IX86_BUILTIN_PRORVD128,
29984 IX86_BUILTIN_PROLVD128,
29985 IX86_BUILTIN_PRORD128,
29986 IX86_BUILTIN_PROLD128,
29987 IX86_BUILTIN_FPCLASSPD256,
29988 IX86_BUILTIN_FPCLASSPD128,
29989 IX86_BUILTIN_FPCLASSSD,
29990 IX86_BUILTIN_FPCLASSPS256,
29991 IX86_BUILTIN_FPCLASSPS128,
29992 IX86_BUILTIN_FPCLASSSS,
29993 IX86_BUILTIN_CVTB2MASK128,
29994 IX86_BUILTIN_CVTB2MASK256,
29995 IX86_BUILTIN_CVTW2MASK128,
29996 IX86_BUILTIN_CVTW2MASK256,
29997 IX86_BUILTIN_CVTD2MASK128,
29998 IX86_BUILTIN_CVTD2MASK256,
29999 IX86_BUILTIN_CVTQ2MASK128,
30000 IX86_BUILTIN_CVTQ2MASK256,
30001 IX86_BUILTIN_CVTMASK2B128,
30002 IX86_BUILTIN_CVTMASK2B256,
30003 IX86_BUILTIN_CVTMASK2W128,
30004 IX86_BUILTIN_CVTMASK2W256,
30005 IX86_BUILTIN_CVTMASK2D128,
30006 IX86_BUILTIN_CVTMASK2D256,
30007 IX86_BUILTIN_CVTMASK2Q128,
30008 IX86_BUILTIN_CVTMASK2Q256,
30009 IX86_BUILTIN_PCMPEQB128_MASK,
30010 IX86_BUILTIN_PCMPEQB256_MASK,
30011 IX86_BUILTIN_PCMPEQW128_MASK,
30012 IX86_BUILTIN_PCMPEQW256_MASK,
30013 IX86_BUILTIN_PCMPEQD128_MASK,
30014 IX86_BUILTIN_PCMPEQD256_MASK,
30015 IX86_BUILTIN_PCMPEQQ128_MASK,
30016 IX86_BUILTIN_PCMPEQQ256_MASK,
30017 IX86_BUILTIN_PCMPGTB128_MASK,
30018 IX86_BUILTIN_PCMPGTB256_MASK,
30019 IX86_BUILTIN_PCMPGTW128_MASK,
30020 IX86_BUILTIN_PCMPGTW256_MASK,
30021 IX86_BUILTIN_PCMPGTD128_MASK,
30022 IX86_BUILTIN_PCMPGTD256_MASK,
30023 IX86_BUILTIN_PCMPGTQ128_MASK,
30024 IX86_BUILTIN_PCMPGTQ256_MASK,
30025 IX86_BUILTIN_PTESTMB128,
30026 IX86_BUILTIN_PTESTMB256,
30027 IX86_BUILTIN_PTESTMW128,
30028 IX86_BUILTIN_PTESTMW256,
30029 IX86_BUILTIN_PTESTMD128,
30030 IX86_BUILTIN_PTESTMD256,
30031 IX86_BUILTIN_PTESTMQ128,
30032 IX86_BUILTIN_PTESTMQ256,
30033 IX86_BUILTIN_PTESTNMB128,
30034 IX86_BUILTIN_PTESTNMB256,
30035 IX86_BUILTIN_PTESTNMW128,
30036 IX86_BUILTIN_PTESTNMW256,
30037 IX86_BUILTIN_PTESTNMD128,
30038 IX86_BUILTIN_PTESTNMD256,
30039 IX86_BUILTIN_PTESTNMQ128,
30040 IX86_BUILTIN_PTESTNMQ256,
30041 IX86_BUILTIN_PBROADCASTMB128,
30042 IX86_BUILTIN_PBROADCASTMB256,
30043 IX86_BUILTIN_PBROADCASTMW128,
30044 IX86_BUILTIN_PBROADCASTMW256,
30045 IX86_BUILTIN_COMPRESSPD256,
30046 IX86_BUILTIN_COMPRESSPD128,
30047 IX86_BUILTIN_COMPRESSPS256,
30048 IX86_BUILTIN_COMPRESSPS128,
30049 IX86_BUILTIN_PCOMPRESSQ256,
30050 IX86_BUILTIN_PCOMPRESSQ128,
30051 IX86_BUILTIN_PCOMPRESSD256,
30052 IX86_BUILTIN_PCOMPRESSD128,
30053 IX86_BUILTIN_EXPANDPD256,
30054 IX86_BUILTIN_EXPANDPD128,
30055 IX86_BUILTIN_EXPANDPS256,
30056 IX86_BUILTIN_EXPANDPS128,
30057 IX86_BUILTIN_PEXPANDQ256,
30058 IX86_BUILTIN_PEXPANDQ128,
30059 IX86_BUILTIN_PEXPANDD256,
30060 IX86_BUILTIN_PEXPANDD128,
30061 IX86_BUILTIN_EXPANDPD256Z,
30062 IX86_BUILTIN_EXPANDPD128Z,
30063 IX86_BUILTIN_EXPANDPS256Z,
30064 IX86_BUILTIN_EXPANDPS128Z,
30065 IX86_BUILTIN_PEXPANDQ256Z,
30066 IX86_BUILTIN_PEXPANDQ128Z,
30067 IX86_BUILTIN_PEXPANDD256Z,
30068 IX86_BUILTIN_PEXPANDD128Z,
30069 IX86_BUILTIN_PMAXSD256_MASK,
30070 IX86_BUILTIN_PMINSD256_MASK,
30071 IX86_BUILTIN_PMAXUD256_MASK,
30072 IX86_BUILTIN_PMINUD256_MASK,
30073 IX86_BUILTIN_PMAXSD128_MASK,
30074 IX86_BUILTIN_PMINSD128_MASK,
30075 IX86_BUILTIN_PMAXUD128_MASK,
30076 IX86_BUILTIN_PMINUD128_MASK,
30077 IX86_BUILTIN_PMAXSQ256_MASK,
30078 IX86_BUILTIN_PMINSQ256_MASK,
30079 IX86_BUILTIN_PMAXUQ256_MASK,
30080 IX86_BUILTIN_PMINUQ256_MASK,
30081 IX86_BUILTIN_PMAXSQ128_MASK,
30082 IX86_BUILTIN_PMINSQ128_MASK,
30083 IX86_BUILTIN_PMAXUQ128_MASK,
30084 IX86_BUILTIN_PMINUQ128_MASK,
30085 IX86_BUILTIN_PMINSB256_MASK,
30086 IX86_BUILTIN_PMINUB256_MASK,
30087 IX86_BUILTIN_PMAXSB256_MASK,
30088 IX86_BUILTIN_PMAXUB256_MASK,
30089 IX86_BUILTIN_PMINSB128_MASK,
30090 IX86_BUILTIN_PMINUB128_MASK,
30091 IX86_BUILTIN_PMAXSB128_MASK,
30092 IX86_BUILTIN_PMAXUB128_MASK,
30093 IX86_BUILTIN_PMINSW256_MASK,
30094 IX86_BUILTIN_PMINUW256_MASK,
30095 IX86_BUILTIN_PMAXSW256_MASK,
30096 IX86_BUILTIN_PMAXUW256_MASK,
30097 IX86_BUILTIN_PMINSW128_MASK,
30098 IX86_BUILTIN_PMINUW128_MASK,
30099 IX86_BUILTIN_PMAXSW128_MASK,
30100 IX86_BUILTIN_PMAXUW128_MASK,
30101 IX86_BUILTIN_VPCONFLICTQ256,
30102 IX86_BUILTIN_VPCONFLICTD256,
30103 IX86_BUILTIN_VPCLZCNTQ256,
30104 IX86_BUILTIN_VPCLZCNTD256,
30105 IX86_BUILTIN_UNPCKHPD256_MASK,
30106 IX86_BUILTIN_UNPCKHPD128_MASK,
30107 IX86_BUILTIN_UNPCKHPS256_MASK,
30108 IX86_BUILTIN_UNPCKHPS128_MASK,
30109 IX86_BUILTIN_UNPCKLPD256_MASK,
30110 IX86_BUILTIN_UNPCKLPD128_MASK,
30111 IX86_BUILTIN_UNPCKLPS256_MASK,
30112 IX86_BUILTIN_VPCONFLICTQ128,
30113 IX86_BUILTIN_VPCONFLICTD128,
30114 IX86_BUILTIN_VPCLZCNTQ128,
30115 IX86_BUILTIN_VPCLZCNTD128,
30116 IX86_BUILTIN_UNPCKLPS128_MASK,
30117 IX86_BUILTIN_ALIGND256,
30118 IX86_BUILTIN_ALIGNQ256,
30119 IX86_BUILTIN_ALIGND128,
30120 IX86_BUILTIN_ALIGNQ128,
30121 IX86_BUILTIN_CVTPS2PH256_MASK,
30122 IX86_BUILTIN_CVTPS2PH_MASK,
30123 IX86_BUILTIN_CVTPH2PS_MASK,
30124 IX86_BUILTIN_CVTPH2PS256_MASK,
30125 IX86_BUILTIN_PUNPCKHDQ128_MASK,
30126 IX86_BUILTIN_PUNPCKHDQ256_MASK,
30127 IX86_BUILTIN_PUNPCKHQDQ128_MASK,
30128 IX86_BUILTIN_PUNPCKHQDQ256_MASK,
30129 IX86_BUILTIN_PUNPCKLDQ128_MASK,
30130 IX86_BUILTIN_PUNPCKLDQ256_MASK,
30131 IX86_BUILTIN_PUNPCKLQDQ128_MASK,
30132 IX86_BUILTIN_PUNPCKLQDQ256_MASK,
30133 IX86_BUILTIN_PUNPCKHBW128_MASK,
30134 IX86_BUILTIN_PUNPCKHBW256_MASK,
30135 IX86_BUILTIN_PUNPCKHWD128_MASK,
30136 IX86_BUILTIN_PUNPCKHWD256_MASK,
30137 IX86_BUILTIN_PUNPCKLBW128_MASK,
30138 IX86_BUILTIN_PUNPCKLBW256_MASK,
30139 IX86_BUILTIN_PUNPCKLWD128_MASK,
30140 IX86_BUILTIN_PUNPCKLWD256_MASK,
30141 IX86_BUILTIN_PSLLVV16HI,
30142 IX86_BUILTIN_PSLLVV8HI,
30143 IX86_BUILTIN_PACKSSDW256_MASK,
30144 IX86_BUILTIN_PACKSSDW128_MASK,
30145 IX86_BUILTIN_PACKUSDW256_MASK,
30146 IX86_BUILTIN_PACKUSDW128_MASK,
30147 IX86_BUILTIN_PAVGB256_MASK,
30148 IX86_BUILTIN_PAVGW256_MASK,
30149 IX86_BUILTIN_PAVGB128_MASK,
30150 IX86_BUILTIN_PAVGW128_MASK,
30151 IX86_BUILTIN_VPERMVARSF256_MASK,
30152 IX86_BUILTIN_VPERMVARDF256_MASK,
30153 IX86_BUILTIN_VPERMDF256_MASK,
30154 IX86_BUILTIN_PABSB256_MASK,
30155 IX86_BUILTIN_PABSB128_MASK,
30156 IX86_BUILTIN_PABSW256_MASK,
30157 IX86_BUILTIN_PABSW128_MASK,
30158 IX86_BUILTIN_VPERMILVARPD_MASK,
30159 IX86_BUILTIN_VPERMILVARPS_MASK,
30160 IX86_BUILTIN_VPERMILVARPD256_MASK,
30161 IX86_BUILTIN_VPERMILVARPS256_MASK,
30162 IX86_BUILTIN_VPERMILPD_MASK,
30163 IX86_BUILTIN_VPERMILPS_MASK,
30164 IX86_BUILTIN_VPERMILPD256_MASK,
30165 IX86_BUILTIN_VPERMILPS256_MASK,
30166 IX86_BUILTIN_BLENDMQ256,
30167 IX86_BUILTIN_BLENDMD256,
30168 IX86_BUILTIN_BLENDMPD256,
30169 IX86_BUILTIN_BLENDMPS256,
30170 IX86_BUILTIN_BLENDMQ128,
30171 IX86_BUILTIN_BLENDMD128,
30172 IX86_BUILTIN_BLENDMPD128,
30173 IX86_BUILTIN_BLENDMPS128,
30174 IX86_BUILTIN_BLENDMW256,
30175 IX86_BUILTIN_BLENDMB256,
30176 IX86_BUILTIN_BLENDMW128,
30177 IX86_BUILTIN_BLENDMB128,
30178 IX86_BUILTIN_PMULLD256_MASK,
30179 IX86_BUILTIN_PMULLD128_MASK,
30180 IX86_BUILTIN_PMULUDQ256_MASK,
30181 IX86_BUILTIN_PMULDQ256_MASK,
30182 IX86_BUILTIN_PMULDQ128_MASK,
30183 IX86_BUILTIN_PMULUDQ128_MASK,
30184 IX86_BUILTIN_CVTPD2PS256_MASK,
30185 IX86_BUILTIN_CVTPD2PS_MASK,
30186 IX86_BUILTIN_VPERMVARSI256_MASK,
30187 IX86_BUILTIN_VPERMVARDI256_MASK,
30188 IX86_BUILTIN_VPERMDI256_MASK,
30189 IX86_BUILTIN_CMPQ256,
30190 IX86_BUILTIN_CMPD256,
30191 IX86_BUILTIN_UCMPQ256,
30192 IX86_BUILTIN_UCMPD256,
30193 IX86_BUILTIN_CMPB256,
30194 IX86_BUILTIN_CMPW256,
30195 IX86_BUILTIN_UCMPB256,
30196 IX86_BUILTIN_UCMPW256,
30197 IX86_BUILTIN_CMPPD256_MASK,
30198 IX86_BUILTIN_CMPPS256_MASK,
30199 IX86_BUILTIN_CMPQ128,
30200 IX86_BUILTIN_CMPD128,
30201 IX86_BUILTIN_UCMPQ128,
30202 IX86_BUILTIN_UCMPD128,
30203 IX86_BUILTIN_CMPB128,
30204 IX86_BUILTIN_CMPW128,
30205 IX86_BUILTIN_UCMPB128,
30206 IX86_BUILTIN_UCMPW128,
30207 IX86_BUILTIN_CMPPD128_MASK,
30208 IX86_BUILTIN_CMPPS128_MASK,
30210 IX86_BUILTIN_GATHER3SIV8SF,
30211 IX86_BUILTIN_GATHER3SIV4SF,
30212 IX86_BUILTIN_GATHER3SIV4DF,
30213 IX86_BUILTIN_GATHER3SIV2DF,
30214 IX86_BUILTIN_GATHER3DIV8SF,
30215 IX86_BUILTIN_GATHER3DIV4SF,
30216 IX86_BUILTIN_GATHER3DIV4DF,
30217 IX86_BUILTIN_GATHER3DIV2DF,
30218 IX86_BUILTIN_GATHER3SIV8SI,
30219 IX86_BUILTIN_GATHER3SIV4SI,
30220 IX86_BUILTIN_GATHER3SIV4DI,
30221 IX86_BUILTIN_GATHER3SIV2DI,
30222 IX86_BUILTIN_GATHER3DIV8SI,
30223 IX86_BUILTIN_GATHER3DIV4SI,
30224 IX86_BUILTIN_GATHER3DIV4DI,
30225 IX86_BUILTIN_GATHER3DIV2DI,
30226 IX86_BUILTIN_SCATTERSIV8SF,
30227 IX86_BUILTIN_SCATTERSIV4SF,
30228 IX86_BUILTIN_SCATTERSIV4DF,
30229 IX86_BUILTIN_SCATTERSIV2DF,
30230 IX86_BUILTIN_SCATTERDIV8SF,
30231 IX86_BUILTIN_SCATTERDIV4SF,
30232 IX86_BUILTIN_SCATTERDIV4DF,
30233 IX86_BUILTIN_SCATTERDIV2DF,
30234 IX86_BUILTIN_SCATTERSIV8SI,
30235 IX86_BUILTIN_SCATTERSIV4SI,
30236 IX86_BUILTIN_SCATTERSIV4DI,
30237 IX86_BUILTIN_SCATTERSIV2DI,
30238 IX86_BUILTIN_SCATTERDIV8SI,
30239 IX86_BUILTIN_SCATTERDIV4SI,
30240 IX86_BUILTIN_SCATTERDIV4DI,
30241 IX86_BUILTIN_SCATTERDIV2DI,
30243 /* AVX512DQ. */
30244 IX86_BUILTIN_RANGESD128,
30245 IX86_BUILTIN_RANGESS128,
30246 IX86_BUILTIN_KUNPCKWD,
30247 IX86_BUILTIN_KUNPCKDQ,
30248 IX86_BUILTIN_BROADCASTF32x2_512,
30249 IX86_BUILTIN_BROADCASTI32x2_512,
30250 IX86_BUILTIN_BROADCASTF64X2_512,
30251 IX86_BUILTIN_BROADCASTI64X2_512,
30252 IX86_BUILTIN_BROADCASTF32X8_512,
30253 IX86_BUILTIN_BROADCASTI32X8_512,
30254 IX86_BUILTIN_EXTRACTF64X2_512,
30255 IX86_BUILTIN_EXTRACTF32X8,
30256 IX86_BUILTIN_EXTRACTI64X2_512,
30257 IX86_BUILTIN_EXTRACTI32X8,
30258 IX86_BUILTIN_REDUCEPD512_MASK,
30259 IX86_BUILTIN_REDUCEPS512_MASK,
30260 IX86_BUILTIN_PMULLQ512,
30261 IX86_BUILTIN_XORPD512,
30262 IX86_BUILTIN_XORPS512,
30263 IX86_BUILTIN_ORPD512,
30264 IX86_BUILTIN_ORPS512,
30265 IX86_BUILTIN_ANDPD512,
30266 IX86_BUILTIN_ANDPS512,
30267 IX86_BUILTIN_ANDNPD512,
30268 IX86_BUILTIN_ANDNPS512,
30269 IX86_BUILTIN_INSERTF32X8,
30270 IX86_BUILTIN_INSERTI32X8,
30271 IX86_BUILTIN_INSERTF64X2_512,
30272 IX86_BUILTIN_INSERTI64X2_512,
30273 IX86_BUILTIN_FPCLASSPD512,
30274 IX86_BUILTIN_FPCLASSPS512,
30275 IX86_BUILTIN_CVTD2MASK512,
30276 IX86_BUILTIN_CVTQ2MASK512,
30277 IX86_BUILTIN_CVTMASK2D512,
30278 IX86_BUILTIN_CVTMASK2Q512,
30279 IX86_BUILTIN_CVTPD2QQ512,
30280 IX86_BUILTIN_CVTPS2QQ512,
30281 IX86_BUILTIN_CVTPD2UQQ512,
30282 IX86_BUILTIN_CVTPS2UQQ512,
30283 IX86_BUILTIN_CVTQQ2PS512,
30284 IX86_BUILTIN_CVTUQQ2PS512,
30285 IX86_BUILTIN_CVTQQ2PD512,
30286 IX86_BUILTIN_CVTUQQ2PD512,
30287 IX86_BUILTIN_CVTTPS2QQ512,
30288 IX86_BUILTIN_CVTTPS2UQQ512,
30289 IX86_BUILTIN_CVTTPD2QQ512,
30290 IX86_BUILTIN_CVTTPD2UQQ512,
30291 IX86_BUILTIN_RANGEPS512,
30292 IX86_BUILTIN_RANGEPD512,
30294 /* AVX512BW. */
30295 IX86_BUILTIN_PACKUSDW512,
30296 IX86_BUILTIN_PACKSSDW512,
30297 IX86_BUILTIN_LOADDQUHI512_MASK,
30298 IX86_BUILTIN_LOADDQUQI512_MASK,
30299 IX86_BUILTIN_PSLLDQ512,
30300 IX86_BUILTIN_PSRLDQ512,
30301 IX86_BUILTIN_STOREDQUHI512_MASK,
30302 IX86_BUILTIN_STOREDQUQI512_MASK,
30303 IX86_BUILTIN_PALIGNR512,
30304 IX86_BUILTIN_PALIGNR512_MASK,
30305 IX86_BUILTIN_MOVDQUHI512_MASK,
30306 IX86_BUILTIN_MOVDQUQI512_MASK,
30307 IX86_BUILTIN_PSADBW512,
30308 IX86_BUILTIN_DBPSADBW512,
30309 IX86_BUILTIN_PBROADCASTB512,
30310 IX86_BUILTIN_PBROADCASTB512_GPR,
30311 IX86_BUILTIN_PBROADCASTW512,
30312 IX86_BUILTIN_PBROADCASTW512_GPR,
30313 IX86_BUILTIN_PMOVSXBW512_MASK,
30314 IX86_BUILTIN_PMOVZXBW512_MASK,
30315 IX86_BUILTIN_VPERMVARHI512_MASK,
30316 IX86_BUILTIN_VPERMT2VARHI512,
30317 IX86_BUILTIN_VPERMT2VARHI512_MASKZ,
30318 IX86_BUILTIN_VPERMI2VARHI512,
30319 IX86_BUILTIN_PAVGB512,
30320 IX86_BUILTIN_PAVGW512,
30321 IX86_BUILTIN_PADDB512,
30322 IX86_BUILTIN_PSUBB512,
30323 IX86_BUILTIN_PSUBSB512,
30324 IX86_BUILTIN_PADDSB512,
30325 IX86_BUILTIN_PSUBUSB512,
30326 IX86_BUILTIN_PADDUSB512,
30327 IX86_BUILTIN_PSUBW512,
30328 IX86_BUILTIN_PADDW512,
30329 IX86_BUILTIN_PSUBSW512,
30330 IX86_BUILTIN_PADDSW512,
30331 IX86_BUILTIN_PSUBUSW512,
30332 IX86_BUILTIN_PADDUSW512,
30333 IX86_BUILTIN_PMAXUW512,
30334 IX86_BUILTIN_PMAXSW512,
30335 IX86_BUILTIN_PMINUW512,
30336 IX86_BUILTIN_PMINSW512,
30337 IX86_BUILTIN_PMAXUB512,
30338 IX86_BUILTIN_PMAXSB512,
30339 IX86_BUILTIN_PMINUB512,
30340 IX86_BUILTIN_PMINSB512,
30341 IX86_BUILTIN_PMOVWB512,
30342 IX86_BUILTIN_PMOVSWB512,
30343 IX86_BUILTIN_PMOVUSWB512,
30344 IX86_BUILTIN_PMULHRSW512_MASK,
30345 IX86_BUILTIN_PMULHUW512_MASK,
30346 IX86_BUILTIN_PMULHW512_MASK,
30347 IX86_BUILTIN_PMULLW512_MASK,
30348 IX86_BUILTIN_PSLLWI512_MASK,
30349 IX86_BUILTIN_PSLLW512_MASK,
30350 IX86_BUILTIN_PACKSSWB512,
30351 IX86_BUILTIN_PACKUSWB512,
30352 IX86_BUILTIN_PSRAVV32HI,
30353 IX86_BUILTIN_PMADDUBSW512_MASK,
30354 IX86_BUILTIN_PMADDWD512_MASK,
30355 IX86_BUILTIN_PSRLVV32HI,
30356 IX86_BUILTIN_PUNPCKHBW512,
30357 IX86_BUILTIN_PUNPCKHWD512,
30358 IX86_BUILTIN_PUNPCKLBW512,
30359 IX86_BUILTIN_PUNPCKLWD512,
30360 IX86_BUILTIN_PSHUFB512,
30361 IX86_BUILTIN_PSHUFHW512,
30362 IX86_BUILTIN_PSHUFLW512,
30363 IX86_BUILTIN_PSRAWI512,
30364 IX86_BUILTIN_PSRAW512,
30365 IX86_BUILTIN_PSRLWI512,
30366 IX86_BUILTIN_PSRLW512,
30367 IX86_BUILTIN_CVTB2MASK512,
30368 IX86_BUILTIN_CVTW2MASK512,
30369 IX86_BUILTIN_CVTMASK2B512,
30370 IX86_BUILTIN_CVTMASK2W512,
30371 IX86_BUILTIN_PCMPEQB512_MASK,
30372 IX86_BUILTIN_PCMPEQW512_MASK,
30373 IX86_BUILTIN_PCMPGTB512_MASK,
30374 IX86_BUILTIN_PCMPGTW512_MASK,
30375 IX86_BUILTIN_PTESTMB512,
30376 IX86_BUILTIN_PTESTMW512,
30377 IX86_BUILTIN_PTESTNMB512,
30378 IX86_BUILTIN_PTESTNMW512,
30379 IX86_BUILTIN_PSLLVV32HI,
30380 IX86_BUILTIN_PABSB512,
30381 IX86_BUILTIN_PABSW512,
30382 IX86_BUILTIN_BLENDMW512,
30383 IX86_BUILTIN_BLENDMB512,
30384 IX86_BUILTIN_CMPB512,
30385 IX86_BUILTIN_CMPW512,
30386 IX86_BUILTIN_UCMPB512,
30387 IX86_BUILTIN_UCMPW512,
30389 /* Alternate 4 and 8 element gather/scatter for the vectorizer
30390 where all operands are 32-byte or 64-byte wide respectively. */
30391 IX86_BUILTIN_GATHERALTSIV4DF,
30392 IX86_BUILTIN_GATHERALTDIV8SF,
30393 IX86_BUILTIN_GATHERALTSIV4DI,
30394 IX86_BUILTIN_GATHERALTDIV8SI,
30395 IX86_BUILTIN_GATHER3ALTDIV16SF,
30396 IX86_BUILTIN_GATHER3ALTDIV16SI,
30397 IX86_BUILTIN_GATHER3ALTSIV4DF,
30398 IX86_BUILTIN_GATHER3ALTDIV8SF,
30399 IX86_BUILTIN_GATHER3ALTSIV4DI,
30400 IX86_BUILTIN_GATHER3ALTDIV8SI,
30401 IX86_BUILTIN_GATHER3ALTSIV8DF,
30402 IX86_BUILTIN_GATHER3ALTSIV8DI,
30403 IX86_BUILTIN_GATHER3DIV16SF,
30404 IX86_BUILTIN_GATHER3DIV16SI,
30405 IX86_BUILTIN_GATHER3DIV8DF,
30406 IX86_BUILTIN_GATHER3DIV8DI,
30407 IX86_BUILTIN_GATHER3SIV16SF,
30408 IX86_BUILTIN_GATHER3SIV16SI,
30409 IX86_BUILTIN_GATHER3SIV8DF,
30410 IX86_BUILTIN_GATHER3SIV8DI,
30411 IX86_BUILTIN_SCATTERDIV16SF,
30412 IX86_BUILTIN_SCATTERDIV16SI,
30413 IX86_BUILTIN_SCATTERDIV8DF,
30414 IX86_BUILTIN_SCATTERDIV8DI,
30415 IX86_BUILTIN_SCATTERSIV16SF,
30416 IX86_BUILTIN_SCATTERSIV16SI,
30417 IX86_BUILTIN_SCATTERSIV8DF,
30418 IX86_BUILTIN_SCATTERSIV8DI,
30420 /* AVX512PF */
30421 IX86_BUILTIN_GATHERPFQPD,
30422 IX86_BUILTIN_GATHERPFDPS,
30423 IX86_BUILTIN_GATHERPFDPD,
30424 IX86_BUILTIN_GATHERPFQPS,
30425 IX86_BUILTIN_SCATTERPFDPD,
30426 IX86_BUILTIN_SCATTERPFDPS,
30427 IX86_BUILTIN_SCATTERPFQPD,
30428 IX86_BUILTIN_SCATTERPFQPS,
30430 /* AVX-512ER */
30431 IX86_BUILTIN_EXP2PD_MASK,
30432 IX86_BUILTIN_EXP2PS_MASK,
30433 IX86_BUILTIN_EXP2PS,
30434 IX86_BUILTIN_RCP28PD,
30435 IX86_BUILTIN_RCP28PS,
30436 IX86_BUILTIN_RCP28SD,
30437 IX86_BUILTIN_RCP28SS,
30438 IX86_BUILTIN_RSQRT28PD,
30439 IX86_BUILTIN_RSQRT28PS,
30440 IX86_BUILTIN_RSQRT28SD,
30441 IX86_BUILTIN_RSQRT28SS,
30443 /* AVX-512IFMA */
30444 IX86_BUILTIN_VPMADD52LUQ512,
30445 IX86_BUILTIN_VPMADD52HUQ512,
30446 IX86_BUILTIN_VPMADD52LUQ256,
30447 IX86_BUILTIN_VPMADD52HUQ256,
30448 IX86_BUILTIN_VPMADD52LUQ128,
30449 IX86_BUILTIN_VPMADD52HUQ128,
30450 IX86_BUILTIN_VPMADD52LUQ512_MASKZ,
30451 IX86_BUILTIN_VPMADD52HUQ512_MASKZ,
30452 IX86_BUILTIN_VPMADD52LUQ256_MASKZ,
30453 IX86_BUILTIN_VPMADD52HUQ256_MASKZ,
30454 IX86_BUILTIN_VPMADD52LUQ128_MASKZ,
30455 IX86_BUILTIN_VPMADD52HUQ128_MASKZ,
30457 /* AVX-512VBMI */
30458 IX86_BUILTIN_VPMULTISHIFTQB512,
30459 IX86_BUILTIN_VPMULTISHIFTQB256,
30460 IX86_BUILTIN_VPMULTISHIFTQB128,
30461 IX86_BUILTIN_VPERMVARQI512_MASK,
30462 IX86_BUILTIN_VPERMT2VARQI512,
30463 IX86_BUILTIN_VPERMT2VARQI512_MASKZ,
30464 IX86_BUILTIN_VPERMI2VARQI512,
30465 IX86_BUILTIN_VPERMVARQI256_MASK,
30466 IX86_BUILTIN_VPERMVARQI128_MASK,
30467 IX86_BUILTIN_VPERMT2VARQI256,
30468 IX86_BUILTIN_VPERMT2VARQI256_MASKZ,
30469 IX86_BUILTIN_VPERMT2VARQI128,
30470 IX86_BUILTIN_VPERMT2VARQI128_MASKZ,
30471 IX86_BUILTIN_VPERMI2VARQI256,
30472 IX86_BUILTIN_VPERMI2VARQI128,
30474 /* SHA builtins. */
30475 IX86_BUILTIN_SHA1MSG1,
30476 IX86_BUILTIN_SHA1MSG2,
30477 IX86_BUILTIN_SHA1NEXTE,
30478 IX86_BUILTIN_SHA1RNDS4,
30479 IX86_BUILTIN_SHA256MSG1,
30480 IX86_BUILTIN_SHA256MSG2,
30481 IX86_BUILTIN_SHA256RNDS2,
30483 /* CLWB instructions. */
30484 IX86_BUILTIN_CLWB,
30486 /* PCOMMIT instructions. */
30487 IX86_BUILTIN_PCOMMIT,
30489 /* CLFLUSHOPT instructions. */
30490 IX86_BUILTIN_CLFLUSHOPT,
30492 /* TFmode support builtins. */
30493 IX86_BUILTIN_INFQ,
30494 IX86_BUILTIN_HUGE_VALQ,
30495 IX86_BUILTIN_FABSQ,
30496 IX86_BUILTIN_COPYSIGNQ,
30498 /* Vectorizer support builtins. */
30499 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
30500 IX86_BUILTIN_CPYSGNPS,
30501 IX86_BUILTIN_CPYSGNPD,
30502 IX86_BUILTIN_CPYSGNPS256,
30503 IX86_BUILTIN_CPYSGNPS512,
30504 IX86_BUILTIN_CPYSGNPD256,
30505 IX86_BUILTIN_CPYSGNPD512,
30506 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
30507 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
30510 /* FMA4 instructions. */
30511 IX86_BUILTIN_VFMADDSS,
30512 IX86_BUILTIN_VFMADDSD,
30513 IX86_BUILTIN_VFMADDPS,
30514 IX86_BUILTIN_VFMADDPD,
30515 IX86_BUILTIN_VFMADDPS256,
30516 IX86_BUILTIN_VFMADDPD256,
30517 IX86_BUILTIN_VFMADDSUBPS,
30518 IX86_BUILTIN_VFMADDSUBPD,
30519 IX86_BUILTIN_VFMADDSUBPS256,
30520 IX86_BUILTIN_VFMADDSUBPD256,
30522 /* FMA3 instructions. */
30523 IX86_BUILTIN_VFMADDSS3,
30524 IX86_BUILTIN_VFMADDSD3,
30526 /* XOP instructions. */
30527 IX86_BUILTIN_VPCMOV,
30528 IX86_BUILTIN_VPCMOV_V2DI,
30529 IX86_BUILTIN_VPCMOV_V4SI,
30530 IX86_BUILTIN_VPCMOV_V8HI,
30531 IX86_BUILTIN_VPCMOV_V16QI,
30532 IX86_BUILTIN_VPCMOV_V4SF,
30533 IX86_BUILTIN_VPCMOV_V2DF,
30534 IX86_BUILTIN_VPCMOV256,
30535 IX86_BUILTIN_VPCMOV_V4DI256,
30536 IX86_BUILTIN_VPCMOV_V8SI256,
30537 IX86_BUILTIN_VPCMOV_V16HI256,
30538 IX86_BUILTIN_VPCMOV_V32QI256,
30539 IX86_BUILTIN_VPCMOV_V8SF256,
30540 IX86_BUILTIN_VPCMOV_V4DF256,
30542 IX86_BUILTIN_VPPERM,
30544 IX86_BUILTIN_VPMACSSWW,
30545 IX86_BUILTIN_VPMACSWW,
30546 IX86_BUILTIN_VPMACSSWD,
30547 IX86_BUILTIN_VPMACSWD,
30548 IX86_BUILTIN_VPMACSSDD,
30549 IX86_BUILTIN_VPMACSDD,
30550 IX86_BUILTIN_VPMACSSDQL,
30551 IX86_BUILTIN_VPMACSSDQH,
30552 IX86_BUILTIN_VPMACSDQL,
30553 IX86_BUILTIN_VPMACSDQH,
30554 IX86_BUILTIN_VPMADCSSWD,
30555 IX86_BUILTIN_VPMADCSWD,
30557 IX86_BUILTIN_VPHADDBW,
30558 IX86_BUILTIN_VPHADDBD,
30559 IX86_BUILTIN_VPHADDBQ,
30560 IX86_BUILTIN_VPHADDWD,
30561 IX86_BUILTIN_VPHADDWQ,
30562 IX86_BUILTIN_VPHADDDQ,
30563 IX86_BUILTIN_VPHADDUBW,
30564 IX86_BUILTIN_VPHADDUBD,
30565 IX86_BUILTIN_VPHADDUBQ,
30566 IX86_BUILTIN_VPHADDUWD,
30567 IX86_BUILTIN_VPHADDUWQ,
30568 IX86_BUILTIN_VPHADDUDQ,
30569 IX86_BUILTIN_VPHSUBBW,
30570 IX86_BUILTIN_VPHSUBWD,
30571 IX86_BUILTIN_VPHSUBDQ,
30573 IX86_BUILTIN_VPROTB,
30574 IX86_BUILTIN_VPROTW,
30575 IX86_BUILTIN_VPROTD,
30576 IX86_BUILTIN_VPROTQ,
30577 IX86_BUILTIN_VPROTB_IMM,
30578 IX86_BUILTIN_VPROTW_IMM,
30579 IX86_BUILTIN_VPROTD_IMM,
30580 IX86_BUILTIN_VPROTQ_IMM,
30582 IX86_BUILTIN_VPSHLB,
30583 IX86_BUILTIN_VPSHLW,
30584 IX86_BUILTIN_VPSHLD,
30585 IX86_BUILTIN_VPSHLQ,
30586 IX86_BUILTIN_VPSHAB,
30587 IX86_BUILTIN_VPSHAW,
30588 IX86_BUILTIN_VPSHAD,
30589 IX86_BUILTIN_VPSHAQ,
30591 IX86_BUILTIN_VFRCZSS,
30592 IX86_BUILTIN_VFRCZSD,
30593 IX86_BUILTIN_VFRCZPS,
30594 IX86_BUILTIN_VFRCZPD,
30595 IX86_BUILTIN_VFRCZPS256,
30596 IX86_BUILTIN_VFRCZPD256,
30598 IX86_BUILTIN_VPCOMEQUB,
30599 IX86_BUILTIN_VPCOMNEUB,
30600 IX86_BUILTIN_VPCOMLTUB,
30601 IX86_BUILTIN_VPCOMLEUB,
30602 IX86_BUILTIN_VPCOMGTUB,
30603 IX86_BUILTIN_VPCOMGEUB,
30604 IX86_BUILTIN_VPCOMFALSEUB,
30605 IX86_BUILTIN_VPCOMTRUEUB,
30607 IX86_BUILTIN_VPCOMEQUW,
30608 IX86_BUILTIN_VPCOMNEUW,
30609 IX86_BUILTIN_VPCOMLTUW,
30610 IX86_BUILTIN_VPCOMLEUW,
30611 IX86_BUILTIN_VPCOMGTUW,
30612 IX86_BUILTIN_VPCOMGEUW,
30613 IX86_BUILTIN_VPCOMFALSEUW,
30614 IX86_BUILTIN_VPCOMTRUEUW,
30616 IX86_BUILTIN_VPCOMEQUD,
30617 IX86_BUILTIN_VPCOMNEUD,
30618 IX86_BUILTIN_VPCOMLTUD,
30619 IX86_BUILTIN_VPCOMLEUD,
30620 IX86_BUILTIN_VPCOMGTUD,
30621 IX86_BUILTIN_VPCOMGEUD,
30622 IX86_BUILTIN_VPCOMFALSEUD,
30623 IX86_BUILTIN_VPCOMTRUEUD,
30625 IX86_BUILTIN_VPCOMEQUQ,
30626 IX86_BUILTIN_VPCOMNEUQ,
30627 IX86_BUILTIN_VPCOMLTUQ,
30628 IX86_BUILTIN_VPCOMLEUQ,
30629 IX86_BUILTIN_VPCOMGTUQ,
30630 IX86_BUILTIN_VPCOMGEUQ,
30631 IX86_BUILTIN_VPCOMFALSEUQ,
30632 IX86_BUILTIN_VPCOMTRUEUQ,
30634 IX86_BUILTIN_VPCOMEQB,
30635 IX86_BUILTIN_VPCOMNEB,
30636 IX86_BUILTIN_VPCOMLTB,
30637 IX86_BUILTIN_VPCOMLEB,
30638 IX86_BUILTIN_VPCOMGTB,
30639 IX86_BUILTIN_VPCOMGEB,
30640 IX86_BUILTIN_VPCOMFALSEB,
30641 IX86_BUILTIN_VPCOMTRUEB,
30643 IX86_BUILTIN_VPCOMEQW,
30644 IX86_BUILTIN_VPCOMNEW,
30645 IX86_BUILTIN_VPCOMLTW,
30646 IX86_BUILTIN_VPCOMLEW,
30647 IX86_BUILTIN_VPCOMGTW,
30648 IX86_BUILTIN_VPCOMGEW,
30649 IX86_BUILTIN_VPCOMFALSEW,
30650 IX86_BUILTIN_VPCOMTRUEW,
30652 IX86_BUILTIN_VPCOMEQD,
30653 IX86_BUILTIN_VPCOMNED,
30654 IX86_BUILTIN_VPCOMLTD,
30655 IX86_BUILTIN_VPCOMLED,
30656 IX86_BUILTIN_VPCOMGTD,
30657 IX86_BUILTIN_VPCOMGED,
30658 IX86_BUILTIN_VPCOMFALSED,
30659 IX86_BUILTIN_VPCOMTRUED,
30661 IX86_BUILTIN_VPCOMEQQ,
30662 IX86_BUILTIN_VPCOMNEQ,
30663 IX86_BUILTIN_VPCOMLTQ,
30664 IX86_BUILTIN_VPCOMLEQ,
30665 IX86_BUILTIN_VPCOMGTQ,
30666 IX86_BUILTIN_VPCOMGEQ,
30667 IX86_BUILTIN_VPCOMFALSEQ,
30668 IX86_BUILTIN_VPCOMTRUEQ,
30670 /* LWP instructions. */
30671 IX86_BUILTIN_LLWPCB,
30672 IX86_BUILTIN_SLWPCB,
30673 IX86_BUILTIN_LWPVAL32,
30674 IX86_BUILTIN_LWPVAL64,
30675 IX86_BUILTIN_LWPINS32,
30676 IX86_BUILTIN_LWPINS64,
30678 IX86_BUILTIN_CLZS,
30680 /* RTM */
30681 IX86_BUILTIN_XBEGIN,
30682 IX86_BUILTIN_XEND,
30683 IX86_BUILTIN_XABORT,
30684 IX86_BUILTIN_XTEST,
30686 /* MPX */
30687 IX86_BUILTIN_BNDMK,
30688 IX86_BUILTIN_BNDSTX,
30689 IX86_BUILTIN_BNDLDX,
30690 IX86_BUILTIN_BNDCL,
30691 IX86_BUILTIN_BNDCU,
30692 IX86_BUILTIN_BNDRET,
30693 IX86_BUILTIN_BNDNARROW,
30694 IX86_BUILTIN_BNDINT,
30695 IX86_BUILTIN_SIZEOF,
30696 IX86_BUILTIN_BNDLOWER,
30697 IX86_BUILTIN_BNDUPPER,
30699 /* BMI instructions. */
30700 IX86_BUILTIN_BEXTR32,
30701 IX86_BUILTIN_BEXTR64,
30702 IX86_BUILTIN_CTZS,
30704 /* TBM instructions. */
30705 IX86_BUILTIN_BEXTRI32,
30706 IX86_BUILTIN_BEXTRI64,
30708 /* BMI2 instructions. */
30709 IX86_BUILTIN_BZHI32,
30710 IX86_BUILTIN_BZHI64,
30711 IX86_BUILTIN_PDEP32,
30712 IX86_BUILTIN_PDEP64,
30713 IX86_BUILTIN_PEXT32,
30714 IX86_BUILTIN_PEXT64,
30716 /* ADX instructions. */
30717 IX86_BUILTIN_ADDCARRYX32,
30718 IX86_BUILTIN_ADDCARRYX64,
30720 /* SBB instructions. */
30721 IX86_BUILTIN_SBB32,
30722 IX86_BUILTIN_SBB64,
30724 /* FSGSBASE instructions. */
30725 IX86_BUILTIN_RDFSBASE32,
30726 IX86_BUILTIN_RDFSBASE64,
30727 IX86_BUILTIN_RDGSBASE32,
30728 IX86_BUILTIN_RDGSBASE64,
30729 IX86_BUILTIN_WRFSBASE32,
30730 IX86_BUILTIN_WRFSBASE64,
30731 IX86_BUILTIN_WRGSBASE32,
30732 IX86_BUILTIN_WRGSBASE64,
30734 /* RDRND instructions. */
30735 IX86_BUILTIN_RDRAND16_STEP,
30736 IX86_BUILTIN_RDRAND32_STEP,
30737 IX86_BUILTIN_RDRAND64_STEP,
30739 /* RDSEED instructions. */
30740 IX86_BUILTIN_RDSEED16_STEP,
30741 IX86_BUILTIN_RDSEED32_STEP,
30742 IX86_BUILTIN_RDSEED64_STEP,
30744 /* F16C instructions. */
30745 IX86_BUILTIN_CVTPH2PS,
30746 IX86_BUILTIN_CVTPH2PS256,
30747 IX86_BUILTIN_CVTPS2PH,
30748 IX86_BUILTIN_CVTPS2PH256,
30750 /* MONITORX and MWAITX instrucions. */
30751 IX86_BUILTIN_MONITORX,
30752 IX86_BUILTIN_MWAITX,
30754 /* CFString built-in for darwin */
30755 IX86_BUILTIN_CFSTRING,
30757 /* Builtins to get CPU type and supported features. */
30758 IX86_BUILTIN_CPU_INIT,
30759 IX86_BUILTIN_CPU_IS,
30760 IX86_BUILTIN_CPU_SUPPORTS,
30762 /* Read/write FLAGS register built-ins. */
30763 IX86_BUILTIN_READ_FLAGS,
30764 IX86_BUILTIN_WRITE_FLAGS,
30766 IX86_BUILTIN_MAX
30769 /* Table for the ix86 builtin decls. */
30770 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
30772 /* Table of all of the builtin functions that are possible with different ISA's
30773 but are waiting to be built until a function is declared to use that
30774 ISA. */
30775 struct builtin_isa {
30776 const char *name; /* function name */
30777 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
30778 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
30779 bool const_p; /* true if the declaration is constant */
30780 bool leaf_p; /* true if the declaration has leaf attribute */
30781 bool nothrow_p; /* true if the declaration has nothrow attribute */
30782 bool set_and_not_built_p;
30785 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
30787 /* Bits that can still enable any inclusion of a builtin. */
30788 static HOST_WIDE_INT deferred_isa_values = 0;
30790 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
30791 of which isa_flags to use in the ix86_builtins_isa array. Stores the
30792 function decl in the ix86_builtins array. Returns the function decl or
30793 NULL_TREE, if the builtin was not added.
30795 If the front end has a special hook for builtin functions, delay adding
30796 builtin functions that aren't in the current ISA until the ISA is changed
30797 with function specific optimization. Doing so, can save about 300K for the
30798 default compiler. When the builtin is expanded, check at that time whether
30799 it is valid.
30801 If the front end doesn't have a special hook, record all builtins, even if
30802 it isn't an instruction set in the current ISA in case the user uses
30803 function specific options for a different ISA, so that we don't get scope
30804 errors if a builtin is added in the middle of a function scope. */
30806 static inline tree
30807 def_builtin (HOST_WIDE_INT mask, const char *name,
30808 enum ix86_builtin_func_type tcode,
30809 enum ix86_builtins code)
30811 tree decl = NULL_TREE;
30813 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
30815 ix86_builtins_isa[(int) code].isa = mask;
30817 mask &= ~OPTION_MASK_ISA_64BIT;
30818 if (mask == 0
30819 || (mask & ix86_isa_flags) != 0
30820 || (lang_hooks.builtin_function
30821 == lang_hooks.builtin_function_ext_scope))
30824 tree type = ix86_get_builtin_func_type (tcode);
30825 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
30826 NULL, NULL_TREE);
30827 ix86_builtins[(int) code] = decl;
30828 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
30830 else
30832 /* Just a MASK where set_and_not_built_p == true can potentially
30833 include a builtin. */
30834 deferred_isa_values |= mask;
30835 ix86_builtins[(int) code] = NULL_TREE;
30836 ix86_builtins_isa[(int) code].tcode = tcode;
30837 ix86_builtins_isa[(int) code].name = name;
30838 ix86_builtins_isa[(int) code].leaf_p = false;
30839 ix86_builtins_isa[(int) code].nothrow_p = false;
30840 ix86_builtins_isa[(int) code].const_p = false;
30841 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
30845 return decl;
30848 /* Like def_builtin, but also marks the function decl "const". */
30850 static inline tree
30851 def_builtin_const (HOST_WIDE_INT mask, const char *name,
30852 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
30854 tree decl = def_builtin (mask, name, tcode, code);
30855 if (decl)
30856 TREE_READONLY (decl) = 1;
30857 else
30858 ix86_builtins_isa[(int) code].const_p = true;
30860 return decl;
30863 /* Add any new builtin functions for a given ISA that may not have been
30864 declared. This saves a bit of space compared to adding all of the
30865 declarations to the tree, even if we didn't use them. */
30867 static void
30868 ix86_add_new_builtins (HOST_WIDE_INT isa)
30870 if ((isa & deferred_isa_values) == 0)
30871 return;
30873 /* Bits in ISA value can be removed from potential isa values. */
30874 deferred_isa_values &= ~isa;
30876 int i;
30877 tree saved_current_target_pragma = current_target_pragma;
30878 current_target_pragma = NULL_TREE;
30880 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
30882 if ((ix86_builtins_isa[i].isa & isa) != 0
30883 && ix86_builtins_isa[i].set_and_not_built_p)
30885 tree decl, type;
30887 /* Don't define the builtin again. */
30888 ix86_builtins_isa[i].set_and_not_built_p = false;
30890 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
30891 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
30892 type, i, BUILT_IN_MD, NULL,
30893 NULL_TREE);
30895 ix86_builtins[i] = decl;
30896 if (ix86_builtins_isa[i].const_p)
30897 TREE_READONLY (decl) = 1;
30898 if (ix86_builtins_isa[i].leaf_p)
30899 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
30900 NULL_TREE);
30901 if (ix86_builtins_isa[i].nothrow_p)
30902 TREE_NOTHROW (decl) = 1;
30906 current_target_pragma = saved_current_target_pragma;
30909 /* Bits for builtin_description.flag. */
30911 /* Set when we don't support the comparison natively, and should
30912 swap_comparison in order to support it. */
30913 #define BUILTIN_DESC_SWAP_OPERANDS 1
30915 struct builtin_description
30917 const HOST_WIDE_INT mask;
30918 const enum insn_code icode;
30919 const char *const name;
30920 const enum ix86_builtins code;
30921 const enum rtx_code comparison;
30922 const int flag;
30925 static const struct builtin_description bdesc_comi[] =
30927 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
30928 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
30929 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
30930 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
30931 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
30932 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
30933 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
30934 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
30935 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
30936 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
30937 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
30938 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
30939 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
30940 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
30941 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
30942 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
30943 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
30944 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
30945 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
30946 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
30947 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
30948 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
30949 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
30950 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
30953 static const struct builtin_description bdesc_pcmpestr[] =
30955 /* SSE4.2 */
30956 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
30957 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
30958 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
30959 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
30960 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
30961 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
30962 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
30965 static const struct builtin_description bdesc_pcmpistr[] =
30967 /* SSE4.2 */
30968 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
30969 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
30970 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
30971 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
30972 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
30973 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
30974 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
30977 /* Special builtins with variable number of arguments. */
30978 static const struct builtin_description bdesc_special_args[] =
30980 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
30981 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
30982 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
30984 /* 80387 (for use internally for atomic compound assignment). */
30985 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
30986 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
30987 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
30988 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
30990 /* MMX */
30991 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30993 /* 3DNow! */
30994 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30996 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
30997 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
30998 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
30999 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31000 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31001 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31002 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31003 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31004 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31006 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
31007 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
31008 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31009 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31010 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31011 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31012 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31013 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31015 /* SSE */
31016 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
31017 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
31018 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
31020 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
31021 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
31022 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
31023 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
31025 /* SSE or 3DNow!A */
31026 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
31027 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
31029 /* SSE2 */
31030 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
31031 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
31032 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
31033 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
31034 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
31035 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
31036 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
31037 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
31038 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
31039 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
31041 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
31042 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
31044 /* SSE3 */
31045 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
31047 /* SSE4.1 */
31048 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
31050 /* SSE4A */
31051 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
31052 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
31054 /* AVX */
31055 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
31056 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
31058 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
31059 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
31060 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
31061 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
31062 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
31064 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
31065 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
31066 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
31067 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
31068 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
31069 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
31070 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
31072 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
31073 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
31074 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
31076 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
31077 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
31078 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
31079 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
31080 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
31081 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
31082 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
31083 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
31085 /* AVX2 */
31086 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
31087 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
31088 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
31089 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
31090 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
31091 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
31092 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
31093 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
31094 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
31096 /* AVX512F */
31097 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
31098 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
31099 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
31100 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
31101 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
31102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
31103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
31104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
31105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
31106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
31107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
31108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
31109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
31110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
31111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
31112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
31113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
31114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
31115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
31116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
31117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
31118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
31119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
31120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
31121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
31122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
31123 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
31124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
31125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
31126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
31127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
31128 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
31129 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
31130 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
31131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
31132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
31133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
31134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
31135 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
31136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
31137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
31138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
31139 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
31140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
31141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
31142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
31143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
31145 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
31146 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
31147 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
31148 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
31149 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
31150 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
31152 /* FSGSBASE */
31153 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
31154 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
31155 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
31156 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
31157 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
31158 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
31159 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
31160 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
31162 /* RTM */
31163 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
31164 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
31165 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
31167 /* AVX512BW */
31168 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_SI },
31169 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_DI },
31170 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_SI },
31171 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_DI },
31173 /* AVX512VL */
31174 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_loaddquhi256_mask", IX86_BUILTIN_LOADDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_HI },
31175 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_loaddquhi128_mask", IX86_BUILTIN_LOADDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_QI },
31176 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_loaddquqi256_mask", IX86_BUILTIN_LOADDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_SI },
31177 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_loaddquqi128_mask", IX86_BUILTIN_LOADDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_HI },
31178 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64load256_mask", IX86_BUILTIN_MOVDQA64LOAD256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31179 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64load128_mask", IX86_BUILTIN_MOVDQA64LOAD128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31180 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32load256_mask", IX86_BUILTIN_MOVDQA32LOAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31181 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32load128_mask", IX86_BUILTIN_MOVDQA32LOAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31182 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4di_mask, "__builtin_ia32_movdqa64store256_mask", IX86_BUILTIN_MOVDQA64STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
31183 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2di_mask, "__builtin_ia32_movdqa64store128_mask", IX86_BUILTIN_MOVDQA64STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
31184 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8si_mask, "__builtin_ia32_movdqa32store256_mask", IX86_BUILTIN_MOVDQA32STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
31185 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4si_mask, "__builtin_ia32_movdqa32store128_mask", IX86_BUILTIN_MOVDQA32STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
31186 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_loadapd256_mask", IX86_BUILTIN_LOADAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31187 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_loadapd128_mask", IX86_BUILTIN_LOADAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31188 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_loadaps256_mask", IX86_BUILTIN_LOADAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31189 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_loadaps128_mask", IX86_BUILTIN_LOADAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31190 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4df_mask, "__builtin_ia32_storeapd256_mask", IX86_BUILTIN_STOREAPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
31191 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2df_mask, "__builtin_ia32_storeapd128_mask", IX86_BUILTIN_STOREAPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
31192 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8sf_mask, "__builtin_ia32_storeaps256_mask", IX86_BUILTIN_STOREAPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
31193 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4sf_mask, "__builtin_ia32_storeaps128_mask", IX86_BUILTIN_STOREAPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
31194 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadupd256_mask, "__builtin_ia32_loadupd256_mask", IX86_BUILTIN_LOADUPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31195 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loadupd_mask, "__builtin_ia32_loadupd128_mask", IX86_BUILTIN_LOADUPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31196 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadups256_mask, "__builtin_ia32_loadups256_mask", IX86_BUILTIN_LOADUPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31197 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_loadups_mask, "__builtin_ia32_loadups128_mask", IX86_BUILTIN_LOADUPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31198 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd256_mask, "__builtin_ia32_storeupd256_mask", IX86_BUILTIN_STOREUPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
31199 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd_mask, "__builtin_ia32_storeupd128_mask", IX86_BUILTIN_STOREUPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
31200 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups256_mask, "__builtin_ia32_storeups256_mask", IX86_BUILTIN_STOREUPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
31201 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups_mask, "__builtin_ia32_storeups128_mask", IX86_BUILTIN_STOREUPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
31202 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv4di_mask, "__builtin_ia32_loaddqudi256_mask", IX86_BUILTIN_LOADDQUDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31203 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv2di_mask, "__builtin_ia32_loaddqudi128_mask", IX86_BUILTIN_LOADDQUDI128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31204 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv8si_mask, "__builtin_ia32_loaddqusi256_mask", IX86_BUILTIN_LOADDQUSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31205 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv4si_mask, "__builtin_ia32_loaddqusi128_mask", IX86_BUILTIN_LOADDQUSI128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31206 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4di_mask, "__builtin_ia32_storedqudi256_mask", IX86_BUILTIN_STOREDQUDI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
31207 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv2di_mask, "__builtin_ia32_storedqudi128_mask", IX86_BUILTIN_STOREDQUDI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
31208 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8si_mask, "__builtin_ia32_storedqusi256_mask", IX86_BUILTIN_STOREDQUSI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
31209 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4si_mask, "__builtin_ia32_storedqusi128_mask", IX86_BUILTIN_STOREDQUSI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
31210 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16hi_mask, "__builtin_ia32_storedquhi256_mask", IX86_BUILTIN_STOREDQUHI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_HI },
31211 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8hi_mask, "__builtin_ia32_storedquhi128_mask", IX86_BUILTIN_STOREDQUHI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_QI },
31212 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv32qi_mask, "__builtin_ia32_storedquqi256_mask", IX86_BUILTIN_STOREDQUQI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_SI },
31213 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16qi_mask, "__builtin_ia32_storedquqi128_mask", IX86_BUILTIN_STOREDQUQI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_HI },
31214 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4df_mask, "__builtin_ia32_compressstoredf256_mask", IX86_BUILTIN_COMPRESSPDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
31215 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2df_mask, "__builtin_ia32_compressstoredf128_mask", IX86_BUILTIN_COMPRESSPDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
31216 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8sf_mask, "__builtin_ia32_compressstoresf256_mask", IX86_BUILTIN_COMPRESSPSSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
31217 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4sf_mask, "__builtin_ia32_compressstoresf128_mask", IX86_BUILTIN_COMPRESSPSSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
31218 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4di_mask, "__builtin_ia32_compressstoredi256_mask", IX86_BUILTIN_PCOMPRESSQSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
31219 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2di_mask, "__builtin_ia32_compressstoredi128_mask", IX86_BUILTIN_PCOMPRESSQSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
31220 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8si_mask, "__builtin_ia32_compressstoresi256_mask", IX86_BUILTIN_PCOMPRESSDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
31221 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4si_mask, "__builtin_ia32_compressstoresi128_mask", IX86_BUILTIN_PCOMPRESSDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
31222 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expandloaddf256_mask", IX86_BUILTIN_EXPANDPDLOAD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31223 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expandloaddf128_mask", IX86_BUILTIN_EXPANDPDLOAD128, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31224 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandloadsf256_mask", IX86_BUILTIN_EXPANDPSLOAD256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31225 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandloadsf128_mask", IX86_BUILTIN_EXPANDPSLOAD128, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31226 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expandloaddi256_mask", IX86_BUILTIN_PEXPANDQLOAD256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31227 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expandloaddi128_mask", IX86_BUILTIN_PEXPANDQLOAD128, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31228 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandloadsi256_mask", IX86_BUILTIN_PEXPANDDLOAD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31229 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandloadsi128_mask", IX86_BUILTIN_PEXPANDDLOAD128, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31230 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expandloaddf256_maskz", IX86_BUILTIN_EXPANDPDLOAD256Z, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31231 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expandloaddf128_maskz", IX86_BUILTIN_EXPANDPDLOAD128Z, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31232 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandloadsf256_maskz", IX86_BUILTIN_EXPANDPSLOAD256Z, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31233 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandloadsf128_maskz", IX86_BUILTIN_EXPANDPSLOAD128Z, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31234 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expandloaddi256_maskz", IX86_BUILTIN_PEXPANDQLOAD256Z, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31235 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expandloaddi128_maskz", IX86_BUILTIN_PEXPANDQLOAD128Z, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31236 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandloadsi256_maskz", IX86_BUILTIN_PEXPANDDLOAD256Z, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31237 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandloadsi128_maskz", IX86_BUILTIN_PEXPANDDLOAD128Z, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31238 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask_store, "__builtin_ia32_pmovqd256mem_mask", IX86_BUILTIN_PMOVQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
31239 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask_store, "__builtin_ia32_pmovqd128mem_mask", IX86_BUILTIN_PMOVQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
31240 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask_store, "__builtin_ia32_pmovsqd256mem_mask", IX86_BUILTIN_PMOVSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
31241 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask_store, "__builtin_ia32_pmovsqd128mem_mask", IX86_BUILTIN_PMOVSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
31242 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask_store, "__builtin_ia32_pmovusqd256mem_mask", IX86_BUILTIN_PMOVUSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
31243 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask_store, "__builtin_ia32_pmovusqd128mem_mask", IX86_BUILTIN_PMOVUSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
31244 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovqw256mem_mask", IX86_BUILTIN_PMOVQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
31245 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovqw128mem_mask", IX86_BUILTIN_PMOVQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
31246 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovsqw256mem_mask", IX86_BUILTIN_PMOVSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
31247 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovsqw128mem_mask", IX86_BUILTIN_PMOVSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
31248 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovusqw256mem_mask", IX86_BUILTIN_PMOVUSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
31249 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovusqw128mem_mask", IX86_BUILTIN_PMOVUSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
31250 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovqb256mem_mask", IX86_BUILTIN_PMOVQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
31251 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovqb128mem_mask", IX86_BUILTIN_PMOVQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
31252 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovsqb256mem_mask", IX86_BUILTIN_PMOVSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
31253 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovsqb128mem_mask", IX86_BUILTIN_PMOVSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
31254 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovusqb256mem_mask", IX86_BUILTIN_PMOVUSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
31255 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovusqb128mem_mask", IX86_BUILTIN_PMOVUSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
31256 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovdb256mem_mask", IX86_BUILTIN_PMOVDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
31257 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovdb128mem_mask", IX86_BUILTIN_PMOVDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
31258 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovsdb256mem_mask", IX86_BUILTIN_PMOVSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
31259 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovsdb128mem_mask", IX86_BUILTIN_PMOVSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
31260 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovusdb256mem_mask", IX86_BUILTIN_PMOVUSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
31261 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovusdb128mem_mask", IX86_BUILTIN_PMOVUSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
31262 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovdw256mem_mask", IX86_BUILTIN_PMOVDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
31263 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovdw128mem_mask", IX86_BUILTIN_PMOVDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
31264 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovsdw256mem_mask", IX86_BUILTIN_PMOVSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
31265 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovsdw128mem_mask", IX86_BUILTIN_PMOVSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
31266 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovusdw256mem_mask", IX86_BUILTIN_PMOVUSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
31267 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovusdw128mem_mask", IX86_BUILTIN_PMOVUSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
31269 /* PCOMMIT. */
31270 { OPTION_MASK_ISA_PCOMMIT, CODE_FOR_pcommit, "__builtin_ia32_pcommit", IX86_BUILTIN_PCOMMIT, UNKNOWN, (int) VOID_FTYPE_VOID },
31273 /* Builtins with variable number of arguments. */
31274 static const struct builtin_description bdesc_args[] =
31276 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
31277 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
31278 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
31279 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
31280 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
31281 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
31282 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
31284 /* MMX */
31285 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31286 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31287 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31288 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31289 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31290 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31292 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31293 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31294 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31295 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31296 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31297 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31298 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31299 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31301 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31302 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31304 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31305 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31306 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31307 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31309 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31310 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31311 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31312 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31313 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31314 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31316 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31317 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31318 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31319 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31320 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
31321 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
31323 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
31324 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
31325 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
31327 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
31329 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31330 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31331 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
31332 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31333 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31334 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
31336 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31337 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31338 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
31339 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31340 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31341 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
31343 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31344 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31345 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31346 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31348 /* 3DNow! */
31349 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31350 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31351 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31352 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31354 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31355 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31356 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31357 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31358 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31359 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31360 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31361 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31362 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31363 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31364 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31365 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31366 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31367 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31368 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31370 /* 3DNow!A */
31371 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31372 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31373 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31374 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31375 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31376 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31378 /* SSE */
31379 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
31380 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31381 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31382 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31383 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31384 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31385 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31386 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31387 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31388 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31389 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31390 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31392 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31394 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31395 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31396 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31397 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31398 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31399 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31400 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31401 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31403 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31404 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31405 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31406 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31407 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31408 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31409 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31410 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31411 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31412 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31413 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
31414 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31415 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31416 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31417 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31418 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31419 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31420 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31421 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31422 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31424 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31425 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31426 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31427 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31429 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31430 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31431 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31432 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31434 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31436 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31437 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31438 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31439 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31440 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31442 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
31443 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
31444 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
31446 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
31448 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31449 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31450 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31452 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
31453 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
31455 /* SSE MMX or 3Dnow!A */
31456 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31457 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31458 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31460 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31461 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31462 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31463 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31465 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
31466 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
31468 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
31470 /* SSE2 */
31471 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31473 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
31474 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
31475 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31476 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
31477 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
31479 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31480 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31481 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
31482 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31483 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31485 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
31487 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31488 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31489 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31490 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31492 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31493 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
31494 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31496 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31497 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31498 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31499 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31500 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31501 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31502 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31503 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31505 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31506 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31507 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31508 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31509 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
31510 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31511 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31512 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31513 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31514 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31515 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31516 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31517 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31518 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31519 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31520 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31521 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31522 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31523 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31524 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31526 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31527 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31528 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31529 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31531 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31532 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31533 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31534 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31536 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31538 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31539 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31540 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31542 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31544 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31545 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31546 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31547 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31548 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31549 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31550 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31551 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31553 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31554 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31555 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31556 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31557 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31558 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31559 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31560 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31562 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31563 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
31565 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31566 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31567 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31568 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31570 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31571 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31573 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31574 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31575 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31576 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31577 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31578 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31580 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31581 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31582 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31583 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31585 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31586 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31587 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31588 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31589 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31590 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31591 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31592 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31594 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31595 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31596 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31598 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31599 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
31601 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
31602 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31604 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
31606 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
31607 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
31608 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
31609 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
31611 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31612 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31613 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31614 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31615 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31616 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31617 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31619 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31620 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31621 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31622 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31623 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31624 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31625 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31627 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31628 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31629 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31630 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31632 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
31633 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31634 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31636 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
31638 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31640 /* SSE2 MMX */
31641 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31642 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31644 /* SSE3 */
31645 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
31646 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31648 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31649 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31650 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31651 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31652 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31653 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31655 /* SSSE3 */
31656 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31657 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
31658 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31659 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
31660 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31661 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31663 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31664 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31665 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31666 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31667 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31668 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31669 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31670 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31671 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31672 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31673 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31674 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31675 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
31676 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
31677 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31678 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31679 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31680 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31681 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31682 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31683 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31684 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31685 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31686 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31688 /* SSSE3. */
31689 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
31690 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
31692 /* SSE4.1 */
31693 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31694 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31695 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
31696 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
31697 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31698 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31699 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31700 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
31701 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
31702 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
31704 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31705 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31706 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31707 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31708 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31709 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31710 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31711 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31712 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31713 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31714 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31715 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31716 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31718 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31719 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31720 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31721 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31722 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31723 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31724 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31725 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31726 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31727 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31728 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31729 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31731 /* SSE4.1 */
31732 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31733 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31734 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31735 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31737 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
31738 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
31739 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
31740 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
31742 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31743 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31745 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31746 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31748 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
31749 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
31750 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
31751 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
31753 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
31754 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
31756 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31757 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31759 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31760 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31761 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31763 /* SSE4.2 */
31764 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31765 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
31766 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
31767 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31768 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31770 /* SSE4A */
31771 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
31772 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
31773 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
31774 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31776 /* AES */
31777 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
31778 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31780 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31781 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31782 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31783 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31785 /* PCLMUL */
31786 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
31788 /* AVX */
31789 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31790 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31791 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31792 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31793 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31794 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31795 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31796 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31797 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31798 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31799 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31800 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31801 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31802 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31803 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31804 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31805 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31806 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31807 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31808 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31809 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31810 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31811 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31812 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31813 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31814 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31816 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
31817 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
31818 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
31819 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31821 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31822 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31823 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
31824 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
31825 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31826 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31827 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31828 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31829 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31830 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31831 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31832 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31833 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31834 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
31835 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
31836 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
31837 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
31838 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
31839 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
31840 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31841 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
31842 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31843 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31844 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31845 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31846 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31847 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31848 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31849 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31850 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31851 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31852 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
31853 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
31854 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
31856 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31857 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31858 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31860 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31861 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31862 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31863 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31864 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31866 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31868 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31869 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31871 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
31872 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
31873 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
31874 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
31876 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31877 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31879 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31880 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31882 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
31883 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
31884 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
31885 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
31887 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
31888 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
31890 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31891 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31893 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31894 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31895 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31896 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31898 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31899 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31900 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31901 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
31902 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
31903 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
31905 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31906 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31907 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31908 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31909 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31910 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31911 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31912 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31913 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31914 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31915 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31916 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31917 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31918 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31919 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31921 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
31922 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
31924 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31925 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31927 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31929 /* AVX2 */
31930 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
31931 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
31932 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
31933 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
31934 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31935 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31936 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31937 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31938 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31939 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31940 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31941 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31942 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31943 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31944 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31945 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31946 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
31947 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31948 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31949 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31950 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31951 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
31952 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
31953 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31954 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31955 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31956 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31957 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31958 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31959 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31960 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31961 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31962 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31963 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31964 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31965 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31966 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31967 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31968 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
31969 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31970 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31971 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31972 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31973 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31974 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31975 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31976 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31977 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31978 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31979 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31980 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31981 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
31982 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31983 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31984 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31985 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31986 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31987 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31988 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31989 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31990 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31991 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31992 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31993 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31994 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31995 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31996 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31997 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31998 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31999 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32000 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
32001 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
32002 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
32003 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
32004 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
32005 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
32006 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
32007 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
32008 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32009 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32010 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
32011 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
32012 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
32013 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
32014 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
32015 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
32016 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
32017 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
32018 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
32019 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
32020 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
32021 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
32022 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
32023 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
32024 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
32025 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
32026 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
32027 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
32028 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
32029 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32030 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32031 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
32032 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
32033 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32034 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
32035 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32036 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
32037 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32038 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32039 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
32040 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
32041 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32042 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32043 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
32044 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
32045 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
32046 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
32047 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
32048 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
32049 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
32050 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
32051 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
32052 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
32053 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
32054 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
32055 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
32056 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
32057 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
32058 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
32059 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32060 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
32061 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
32062 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
32063 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
32064 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
32065 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
32066 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
32067 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
32068 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32069 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32070 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32071 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32072 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
32073 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
32074 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32075 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32077 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
32079 /* BMI */
32080 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
32081 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
32082 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
32084 /* TBM */
32085 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
32086 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
32088 /* F16C */
32089 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
32090 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
32091 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
32092 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
32094 /* BMI2 */
32095 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
32096 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
32097 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
32098 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
32099 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
32100 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
32102 /* AVX512F */
32103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI },
32104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF },
32105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF },
32106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI },
32107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF },
32108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF },
32109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
32110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
32111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
32116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
32117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
32118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
32119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
32120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
32121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
32122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
32123 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
32126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
32127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
32128 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
32129 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32130 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
32134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
32135 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
32136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
32137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
32138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
32139 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
32140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
32141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32144 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32145 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32146 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32147 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32148 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32149 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32150 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32151 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32152 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32153 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32154 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32155 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32156 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
32157 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
32158 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
32159 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
32160 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
32161 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
32162 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
32163 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
32164 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
32165 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
32166 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32167 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32168 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32169 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32170 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32171 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32172 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32173 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32174 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32175 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32176 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32177 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32178 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32179 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32180 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
32181 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
32182 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
32183 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
32184 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
32185 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
32186 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
32187 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
32188 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
32189 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
32190 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
32191 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
32192 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
32193 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
32194 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
32195 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
32196 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
32197 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
32198 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
32199 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
32200 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
32201 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
32202 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
32203 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
32204 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
32205 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
32206 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32207 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
32208 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32209 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32210 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32211 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32212 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32213 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32214 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32215 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32216 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32217 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32218 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32219 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
32220 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32221 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
32222 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32223 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32224 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32225 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
32226 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32227 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
32228 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32229 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32230 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32231 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
32232 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32233 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
32234 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32235 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32236 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32237 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32238 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32239 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
32240 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
32241 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
32242 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
32243 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32244 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32245 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32246 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32247 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32248 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32249 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32250 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32251 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32252 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32253 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32254 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32255 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32256 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32257 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
32258 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
32259 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
32260 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
32261 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
32262 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
32263 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
32264 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
32265 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32266 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32267 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32268 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32269 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32270 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32271 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32272 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32273 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32274 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32275 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32276 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32277 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32278 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32279 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32280 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
32281 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32282 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32283 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32284 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32285 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
32286 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
32287 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
32288 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
32289 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32290 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32291 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32292 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32293 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32294 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32295 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
32296 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
32297 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
32298 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
32300 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
32301 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
32302 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
32303 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
32304 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
32305 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
32306 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
32307 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
32309 /* Mask arithmetic operations */
32310 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32311 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32312 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
32313 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32314 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32315 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32316 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
32317 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32318 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32319 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
32321 /* SHA */
32322 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32323 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32324 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32325 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
32326 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32327 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32328 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
32330 /* AVX512VL. */
32331 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_palignrv32qi_mask, "__builtin_ia32_palignr256_mask", IX86_BUILTIN_PALIGNR256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT },
32332 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_palignrv16qi_mask, "__builtin_ia32_palignr128_mask", IX86_BUILTIN_PALIGNR128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT },
32333 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64_256_mask", IX86_BUILTIN_MOVDQA64_256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32334 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64_128_mask", IX86_BUILTIN_MOVDQA64_128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32335 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32_256_mask", IX86_BUILTIN_MOVDQA32_256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32336 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32_128_mask", IX86_BUILTIN_MOVDQA32_128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32337 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_movapd256_mask", IX86_BUILTIN_MOVAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32338 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_movapd128_mask", IX86_BUILTIN_MOVAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32339 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_movaps256_mask", IX86_BUILTIN_MOVAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32340 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_movaps128_mask", IX86_BUILTIN_MOVAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32341 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_movdquhi256_mask", IX86_BUILTIN_MOVDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32342 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_movdquhi128_mask", IX86_BUILTIN_MOVDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32343 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_movdquqi256_mask", IX86_BUILTIN_MOVDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32344 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_movdquqi128_mask", IX86_BUILTIN_MOVDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32345 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4sf3_mask, "__builtin_ia32_minps_mask", IX86_BUILTIN_MINPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32346 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4sf3_mask, "__builtin_ia32_maxps_mask", IX86_BUILTIN_MAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32347 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2df3_mask, "__builtin_ia32_minpd_mask", IX86_BUILTIN_MINPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32348 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2df3_mask, "__builtin_ia32_maxpd_mask", IX86_BUILTIN_MAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32349 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4df3_mask, "__builtin_ia32_maxpd256_mask", IX86_BUILTIN_MAXPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32350 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8sf3_mask, "__builtin_ia32_maxps256_mask", IX86_BUILTIN_MAXPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32351 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4df3_mask, "__builtin_ia32_minpd256_mask", IX86_BUILTIN_MINPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32352 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8sf3_mask, "__builtin_ia32_minps256_mask", IX86_BUILTIN_MINPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32353 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4sf3_mask, "__builtin_ia32_mulps_mask", IX86_BUILTIN_MULPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32354 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_divv4sf3_mask, "__builtin_ia32_divps_mask", IX86_BUILTIN_DIVPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32355 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv2df3_mask, "__builtin_ia32_mulpd_mask", IX86_BUILTIN_MULPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32356 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_divv2df3_mask, "__builtin_ia32_divpd_mask", IX86_BUILTIN_DIVPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32357 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv4df3_mask, "__builtin_ia32_divpd256_mask", IX86_BUILTIN_DIVPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32358 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv8sf3_mask, "__builtin_ia32_divps256_mask", IX86_BUILTIN_DIVPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32359 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4df3_mask, "__builtin_ia32_mulpd256_mask", IX86_BUILTIN_MULPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32360 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8sf3_mask, "__builtin_ia32_mulps256_mask", IX86_BUILTIN_MULPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32361 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2df3_mask, "__builtin_ia32_addpd128_mask", IX86_BUILTIN_ADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32362 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4df3_mask, "__builtin_ia32_addpd256_mask", IX86_BUILTIN_ADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32363 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4sf3_mask, "__builtin_ia32_addps128_mask", IX86_BUILTIN_ADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32364 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8sf3_mask, "__builtin_ia32_addps256_mask", IX86_BUILTIN_ADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32365 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2df3_mask, "__builtin_ia32_subpd128_mask", IX86_BUILTIN_SUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32366 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4df3_mask, "__builtin_ia32_subpd256_mask", IX86_BUILTIN_SUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32367 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4sf3_mask, "__builtin_ia32_subps128_mask", IX86_BUILTIN_SUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32368 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8sf3_mask, "__builtin_ia32_subps256_mask", IX86_BUILTIN_SUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32369 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4df3_mask, "__builtin_ia32_xorpd256_mask", IX86_BUILTIN_XORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32370 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2df3_mask, "__builtin_ia32_xorpd128_mask", IX86_BUILTIN_XORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32371 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8sf3_mask, "__builtin_ia32_xorps256_mask", IX86_BUILTIN_XORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32372 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4sf3_mask, "__builtin_ia32_xorps128_mask", IX86_BUILTIN_XORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32373 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4df3_mask, "__builtin_ia32_orpd256_mask", IX86_BUILTIN_ORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32374 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2df3_mask, "__builtin_ia32_orpd128_mask", IX86_BUILTIN_ORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32375 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8sf3_mask, "__builtin_ia32_orps256_mask", IX86_BUILTIN_ORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32376 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4sf3_mask, "__builtin_ia32_orps128_mask", IX86_BUILTIN_ORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32377 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8sf_mask, "__builtin_ia32_broadcastf32x2_256_mask", IX86_BUILTIN_BROADCASTF32x2_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32378 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8si_mask, "__builtin_ia32_broadcasti32x2_256_mask", IX86_BUILTIN_BROADCASTI32x2_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32379 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4si_mask, "__builtin_ia32_broadcasti32x2_128_mask", IX86_BUILTIN_BROADCASTI32x2_128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32380 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4df_mask_1, "__builtin_ia32_broadcastf64x2_256_mask", IX86_BUILTIN_BROADCASTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32381 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4di_mask_1, "__builtin_ia32_broadcasti64x2_256_mask", IX86_BUILTIN_BROADCASTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32382 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8sf_mask_1, "__builtin_ia32_broadcastf32x4_256_mask", IX86_BUILTIN_BROADCASTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32383 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8si_mask_1, "__builtin_ia32_broadcasti32x4_256_mask", IX86_BUILTIN_BROADCASTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32384 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8sf, "__builtin_ia32_extractf32x4_256_mask", IX86_BUILTIN_EXTRACTF32X4_256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT_V4SF_QI },
32385 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8si, "__builtin_ia32_extracti32x4_256_mask", IX86_BUILTIN_EXTRACTI32X4_256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT_V4SI_QI },
32386 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv16hi_mask, "__builtin_ia32_dbpsadbw256_mask", IX86_BUILTIN_DBPSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI },
32387 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv8hi_mask, "__builtin_ia32_dbpsadbw128_mask", IX86_BUILTIN_DBPSADBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI },
32388 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2qq256_mask", IX86_BUILTIN_CVTTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32389 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2qq128_mask", IX86_BUILTIN_CVTTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32390 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2uqq256_mask", IX86_BUILTIN_CVTTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32391 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2uqq128_mask", IX86_BUILTIN_CVTTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32392 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2qq256_mask", IX86_BUILTIN_CVTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32393 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2qq128_mask", IX86_BUILTIN_CVTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32394 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2uqq256_mask", IX86_BUILTIN_CVTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32395 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2uqq128_mask", IX86_BUILTIN_CVTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32396 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4si2_mask, "__builtin_ia32_cvtpd2udq256_mask", IX86_BUILTIN_CVTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32397 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2si2_mask, "__builtin_ia32_cvtpd2udq128_mask", IX86_BUILTIN_CVTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32398 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2qq256_mask", IX86_BUILTIN_CVTTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32399 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2qq128_mask", IX86_BUILTIN_CVTTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32400 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2uqq256_mask", IX86_BUILTIN_CVTTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32401 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2uqq128_mask", IX86_BUILTIN_CVTTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32402 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2dq256_mask", IX86_BUILTIN_CVTTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32403 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2dq128_mask", IX86_BUILTIN_CVTTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32404 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2udq256_mask", IX86_BUILTIN_CVTTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32405 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2udq128_mask", IX86_BUILTIN_CVTTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32406 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2dq256_mask", IX86_BUILTIN_CVTTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32407 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvttpd2dq_mask, "__builtin_ia32_cvttpd2dq128_mask", IX86_BUILTIN_CVTTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32408 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2udq256_mask", IX86_BUILTIN_CVTTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32409 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2si2_mask, "__builtin_ia32_cvttpd2udq128_mask", IX86_BUILTIN_CVTTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32410 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2dq256_mask, "__builtin_ia32_cvtpd2dq256_mask", IX86_BUILTIN_CVTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32411 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2dq_mask, "__builtin_ia32_cvtpd2dq128_mask", IX86_BUILTIN_CVTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32412 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4df2_mask, "__builtin_ia32_cvtdq2pd256_mask", IX86_BUILTIN_CVTDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32413 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtdq2pd_mask, "__builtin_ia32_cvtdq2pd128_mask", IX86_BUILTIN_CVTDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32414 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4df2_mask, "__builtin_ia32_cvtudq2pd256_mask", IX86_BUILTIN_CVTUDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32415 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2siv2df2_mask, "__builtin_ia32_cvtudq2pd128_mask", IX86_BUILTIN_CVTUDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32416 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv8siv8sf2_mask, "__builtin_ia32_cvtdq2ps256_mask", IX86_BUILTIN_CVTDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32417 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4sf2_mask, "__builtin_ia32_cvtdq2ps128_mask", IX86_BUILTIN_CVTDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32418 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv8siv8sf2_mask, "__builtin_ia32_cvtudq2ps256_mask", IX86_BUILTIN_CVTUDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32419 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4sf2_mask, "__builtin_ia32_cvtudq2ps128_mask", IX86_BUILTIN_CVTUDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32420 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtps2pd256_mask, "__builtin_ia32_cvtps2pd256_mask", IX86_BUILTIN_CVTPS2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SF_V4DF_QI },
32421 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtps2pd_mask, "__builtin_ia32_cvtps2pd128_mask", IX86_BUILTIN_CVTPS2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SF_V2DF_QI },
32422 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv32qi_mask, "__builtin_ia32_pbroadcastb256_mask", IX86_BUILTIN_PBROADCASTB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16QI_V32QI_SI },
32423 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv32qi_mask, "__builtin_ia32_pbroadcastb256_gpr_mask", IX86_BUILTIN_PBROADCASTB256_GPR_MASK, UNKNOWN, (int) V32QI_FTYPE_QI_V32QI_SI },
32424 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16qi_mask, "__builtin_ia32_pbroadcastb128_mask", IX86_BUILTIN_PBROADCASTB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32425 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16qi_mask, "__builtin_ia32_pbroadcastb128_gpr_mask", IX86_BUILTIN_PBROADCASTB128_GPR_MASK, UNKNOWN, (int) V16QI_FTYPE_QI_V16QI_HI },
32426 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16hi_mask, "__builtin_ia32_pbroadcastw256_mask", IX86_BUILTIN_PBROADCASTW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8HI_V16HI_HI },
32427 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16hi_mask, "__builtin_ia32_pbroadcastw256_gpr_mask", IX86_BUILTIN_PBROADCASTW256_GPR_MASK, UNKNOWN, (int) V16HI_FTYPE_HI_V16HI_HI },
32428 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8hi_mask, "__builtin_ia32_pbroadcastw128_mask", IX86_BUILTIN_PBROADCASTW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32429 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8hi_mask, "__builtin_ia32_pbroadcastw128_gpr_mask", IX86_BUILTIN_PBROADCASTW128_GPR_MASK, UNKNOWN, (int) V8HI_FTYPE_HI_V8HI_QI },
32430 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8si_mask, "__builtin_ia32_pbroadcastd256_mask", IX86_BUILTIN_PBROADCASTD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32431 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8si_mask, "__builtin_ia32_pbroadcastd256_gpr_mask", IX86_BUILTIN_PBROADCASTD256_GPR_MASK, UNKNOWN, (int) V8SI_FTYPE_SI_V8SI_QI },
32432 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32433 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_QI },
32434 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32435 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
32436 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32437 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
32438 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32439 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32440 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32441 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4df, "__builtin_ia32_extractf64x2_256_mask", IX86_BUILTIN_EXTRACTF64X2_256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT_V2DF_QI },
32442 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4di, "__builtin_ia32_extracti64x2_256_mask", IX86_BUILTIN_EXTRACTI64X2_256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT_V2DI_QI },
32443 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8sf, "__builtin_ia32_insertf32x4_256_mask", IX86_BUILTIN_INSERTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI },
32444 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8si, "__builtin_ia32_inserti32x4_256_mask", IX86_BUILTIN_INSERTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI },
32445 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv16qiv16hi2_mask, "__builtin_ia32_pmovsxbw256_mask", IX86_BUILTIN_PMOVSXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32446 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv8qiv8hi2_mask, "__builtin_ia32_pmovsxbw128_mask", IX86_BUILTIN_PMOVSXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32447 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8qiv8si2_mask, "__builtin_ia32_pmovsxbd256_mask", IX86_BUILTIN_PMOVSXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32448 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4qiv4si2_mask, "__builtin_ia32_pmovsxbd128_mask", IX86_BUILTIN_PMOVSXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32449 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4qiv4di2_mask, "__builtin_ia32_pmovsxbq256_mask", IX86_BUILTIN_PMOVSXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32450 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2qiv2di2_mask, "__builtin_ia32_pmovsxbq128_mask", IX86_BUILTIN_PMOVSXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32451 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8hiv8si2_mask, "__builtin_ia32_pmovsxwd256_mask", IX86_BUILTIN_PMOVSXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32452 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4hiv4si2_mask, "__builtin_ia32_pmovsxwd128_mask", IX86_BUILTIN_PMOVSXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32453 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4hiv4di2_mask, "__builtin_ia32_pmovsxwq256_mask", IX86_BUILTIN_PMOVSXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32454 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2hiv2di2_mask, "__builtin_ia32_pmovsxwq128_mask", IX86_BUILTIN_PMOVSXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32455 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4siv4di2_mask, "__builtin_ia32_pmovsxdq256_mask", IX86_BUILTIN_PMOVSXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32456 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2siv2di2_mask, "__builtin_ia32_pmovsxdq128_mask", IX86_BUILTIN_PMOVSXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32457 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv16qiv16hi2_mask, "__builtin_ia32_pmovzxbw256_mask", IX86_BUILTIN_PMOVZXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32458 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv8qiv8hi2_mask, "__builtin_ia32_pmovzxbw128_mask", IX86_BUILTIN_PMOVZXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32459 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8qiv8si2_mask, "__builtin_ia32_pmovzxbd256_mask", IX86_BUILTIN_PMOVZXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32460 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4qiv4si2_mask, "__builtin_ia32_pmovzxbd128_mask", IX86_BUILTIN_PMOVZXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32461 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4qiv4di2_mask, "__builtin_ia32_pmovzxbq256_mask", IX86_BUILTIN_PMOVZXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32462 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2qiv2di2_mask, "__builtin_ia32_pmovzxbq128_mask", IX86_BUILTIN_PMOVZXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32463 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8hiv8si2_mask, "__builtin_ia32_pmovzxwd256_mask", IX86_BUILTIN_PMOVZXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32464 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4hiv4si2_mask, "__builtin_ia32_pmovzxwd128_mask", IX86_BUILTIN_PMOVZXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32465 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4hiv4di2_mask, "__builtin_ia32_pmovzxwq256_mask", IX86_BUILTIN_PMOVZXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32466 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2hiv2di2_mask, "__builtin_ia32_pmovzxwq128_mask", IX86_BUILTIN_PMOVZXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32467 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4siv4di2_mask, "__builtin_ia32_pmovzxdq256_mask", IX86_BUILTIN_PMOVZXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32468 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2siv2di2_mask, "__builtin_ia32_pmovzxdq128_mask", IX86_BUILTIN_PMOVZXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32469 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4df_mask, "__builtin_ia32_reducepd256_mask", IX86_BUILTIN_REDUCEPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32470 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv2df_mask, "__builtin_ia32_reducepd128_mask", IX86_BUILTIN_REDUCEPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32471 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv8sf_mask, "__builtin_ia32_reduceps256_mask", IX86_BUILTIN_REDUCEPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32472 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4sf_mask, "__builtin_ia32_reduceps128_mask", IX86_BUILTIN_REDUCEPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32473 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv2df, "__builtin_ia32_reducesd", IX86_BUILTIN_REDUCESD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32474 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv4sf, "__builtin_ia32_reducess", IX86_BUILTIN_REDUCESS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32475 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16hi_mask, "__builtin_ia32_permvarhi256_mask", IX86_BUILTIN_VPERMVARHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32476 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv8hi_mask, "__builtin_ia32_permvarhi128_mask", IX86_BUILTIN_VPERMVARHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32477 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_mask, "__builtin_ia32_vpermt2varhi256_mask", IX86_BUILTIN_VPERMT2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32478 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_maskz, "__builtin_ia32_vpermt2varhi256_maskz", IX86_BUILTIN_VPERMT2VARHI256_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32479 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_mask, "__builtin_ia32_vpermt2varhi128_mask", IX86_BUILTIN_VPERMT2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32480 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_maskz, "__builtin_ia32_vpermt2varhi128_maskz", IX86_BUILTIN_VPERMT2VARHI128_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32481 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16hi3_mask, "__builtin_ia32_vpermi2varhi256_mask", IX86_BUILTIN_VPERMI2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32482 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8hi3_mask, "__builtin_ia32_vpermi2varhi128_mask", IX86_BUILTIN_VPERMI2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32483 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4df_mask, "__builtin_ia32_rcp14pd256_mask", IX86_BUILTIN_RCP14PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32484 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v2df_mask, "__builtin_ia32_rcp14pd128_mask", IX86_BUILTIN_RCP14PD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32485 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v8sf_mask, "__builtin_ia32_rcp14ps256_mask", IX86_BUILTIN_RCP14PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32486 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4sf_mask, "__builtin_ia32_rcp14ps128_mask", IX86_BUILTIN_RCP14PS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32487 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4df_mask, "__builtin_ia32_rsqrt14pd256_mask", IX86_BUILTIN_RSQRT14PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32488 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v2df_mask, "__builtin_ia32_rsqrt14pd128_mask", IX86_BUILTIN_RSQRT14PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32489 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v8sf_mask, "__builtin_ia32_rsqrt14ps256_mask", IX86_BUILTIN_RSQRT14PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32490 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4sf_mask, "__builtin_ia32_rsqrt14ps128_mask", IX86_BUILTIN_RSQRT14PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32491 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv4df2_mask, "__builtin_ia32_sqrtpd256_mask", IX86_BUILTIN_SQRTPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32492 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sqrtv2df2_mask, "__builtin_ia32_sqrtpd128_mask", IX86_BUILTIN_SQRTPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32493 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv8sf2_mask, "__builtin_ia32_sqrtps256_mask", IX86_BUILTIN_SQRTPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32494 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_sqrtv4sf2_mask, "__builtin_ia32_sqrtps128_mask", IX86_BUILTIN_SQRTPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32495 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16qi3_mask, "__builtin_ia32_paddb128_mask", IX86_BUILTIN_PADDB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32496 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8hi3_mask, "__builtin_ia32_paddw128_mask", IX86_BUILTIN_PADDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32497 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4si3_mask, "__builtin_ia32_paddd128_mask", IX86_BUILTIN_PADDD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32498 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2di3_mask, "__builtin_ia32_paddq128_mask", IX86_BUILTIN_PADDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32499 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16qi3_mask, "__builtin_ia32_psubb128_mask", IX86_BUILTIN_PSUBB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32500 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8hi3_mask, "__builtin_ia32_psubw128_mask", IX86_BUILTIN_PSUBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32501 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4si3_mask, "__builtin_ia32_psubd128_mask", IX86_BUILTIN_PSUBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32502 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2di3_mask, "__builtin_ia32_psubq128_mask", IX86_BUILTIN_PSUBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32503 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv16qi3_mask, "__builtin_ia32_paddsb128_mask", IX86_BUILTIN_PADDSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32504 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv8hi3_mask, "__builtin_ia32_paddsw128_mask", IX86_BUILTIN_PADDSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32505 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv16qi3_mask, "__builtin_ia32_psubsb128_mask", IX86_BUILTIN_PSUBSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32506 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv8hi3_mask, "__builtin_ia32_psubsw128_mask", IX86_BUILTIN_PSUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32507 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv16qi3_mask, "__builtin_ia32_paddusb128_mask", IX86_BUILTIN_PADDUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32508 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv8hi3_mask, "__builtin_ia32_paddusw128_mask", IX86_BUILTIN_PADDUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32509 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv16qi3_mask, "__builtin_ia32_psubusb128_mask", IX86_BUILTIN_PSUBUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32510 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv8hi3_mask, "__builtin_ia32_psubusw128_mask", IX86_BUILTIN_PSUBUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32511 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv32qi3_mask, "__builtin_ia32_paddb256_mask", IX86_BUILTIN_PADDB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32512 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16hi3_mask, "__builtin_ia32_paddw256_mask", IX86_BUILTIN_PADDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32513 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8si3_mask, "__builtin_ia32_paddd256_mask", IX86_BUILTIN_PADDD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32514 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4di3_mask, "__builtin_ia32_paddq256_mask", IX86_BUILTIN_PADDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32515 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv32qi3_mask, "__builtin_ia32_paddsb256_mask", IX86_BUILTIN_PADDSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32516 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv16hi3_mask, "__builtin_ia32_paddsw256_mask", IX86_BUILTIN_PADDSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32517 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv32qi3_mask, "__builtin_ia32_paddusb256_mask", IX86_BUILTIN_PADDUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32518 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv16hi3_mask, "__builtin_ia32_paddusw256_mask", IX86_BUILTIN_PADDUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32519 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv32qi3_mask, "__builtin_ia32_psubb256_mask", IX86_BUILTIN_PSUBB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32520 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16hi3_mask, "__builtin_ia32_psubw256_mask", IX86_BUILTIN_PSUBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32521 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8si3_mask, "__builtin_ia32_psubd256_mask", IX86_BUILTIN_PSUBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32522 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4di3_mask, "__builtin_ia32_psubq256_mask", IX86_BUILTIN_PSUBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32523 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv32qi3_mask, "__builtin_ia32_psubsb256_mask", IX86_BUILTIN_PSUBSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32524 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv16hi3_mask, "__builtin_ia32_psubsw256_mask", IX86_BUILTIN_PSUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32525 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv32qi3_mask, "__builtin_ia32_psubusb256_mask", IX86_BUILTIN_PSUBUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32526 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv16hi3_mask, "__builtin_ia32_psubusw256_mask", IX86_BUILTIN_PSUBUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32527 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_256_mask", IX86_BUILTIN_SHUF_F64x2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32528 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_256_mask", IX86_BUILTIN_SHUF_I64x2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32529 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_256_mask", IX86_BUILTIN_SHUF_I32x4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32530 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_256_mask", IX86_BUILTIN_SHUF_F32x4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32531 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovwb128_mask", IX86_BUILTIN_PMOVWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32532 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovwb256_mask", IX86_BUILTIN_PMOVWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32533 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovswb128_mask", IX86_BUILTIN_PMOVSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32534 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovswb256_mask", IX86_BUILTIN_PMOVSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32535 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovuswb128_mask", IX86_BUILTIN_PMOVUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32536 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovuswb256_mask", IX86_BUILTIN_PMOVUSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32537 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask, "__builtin_ia32_pmovdb128_mask", IX86_BUILTIN_PMOVDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32538 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask, "__builtin_ia32_pmovdb256_mask", IX86_BUILTIN_PMOVDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32539 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask, "__builtin_ia32_pmovsdb128_mask", IX86_BUILTIN_PMOVSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32540 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask, "__builtin_ia32_pmovsdb256_mask", IX86_BUILTIN_PMOVSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32541 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask, "__builtin_ia32_pmovusdb128_mask", IX86_BUILTIN_PMOVUSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32542 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask, "__builtin_ia32_pmovusdb256_mask", IX86_BUILTIN_PMOVUSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32543 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask, "__builtin_ia32_pmovdw128_mask", IX86_BUILTIN_PMOVDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32544 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask, "__builtin_ia32_pmovdw256_mask", IX86_BUILTIN_PMOVDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32545 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask, "__builtin_ia32_pmovsdw128_mask", IX86_BUILTIN_PMOVSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32546 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask, "__builtin_ia32_pmovsdw256_mask", IX86_BUILTIN_PMOVSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32547 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask, "__builtin_ia32_pmovusdw128_mask", IX86_BUILTIN_PMOVUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32548 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask, "__builtin_ia32_pmovusdw256_mask", IX86_BUILTIN_PMOVUSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32549 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask, "__builtin_ia32_pmovqb128_mask", IX86_BUILTIN_PMOVQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32550 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask, "__builtin_ia32_pmovqb256_mask", IX86_BUILTIN_PMOVQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32551 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask, "__builtin_ia32_pmovsqb128_mask", IX86_BUILTIN_PMOVSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32552 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask, "__builtin_ia32_pmovsqb256_mask", IX86_BUILTIN_PMOVSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32553 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask, "__builtin_ia32_pmovusqb128_mask", IX86_BUILTIN_PMOVUSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32554 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask, "__builtin_ia32_pmovusqb256_mask", IX86_BUILTIN_PMOVUSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32555 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask, "__builtin_ia32_pmovqw128_mask", IX86_BUILTIN_PMOVQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32556 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask, "__builtin_ia32_pmovqw256_mask", IX86_BUILTIN_PMOVQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32557 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask, "__builtin_ia32_pmovsqw128_mask", IX86_BUILTIN_PMOVSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32558 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask, "__builtin_ia32_pmovsqw256_mask", IX86_BUILTIN_PMOVSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32559 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask, "__builtin_ia32_pmovusqw128_mask", IX86_BUILTIN_PMOVUSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32560 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask, "__builtin_ia32_pmovusqw256_mask", IX86_BUILTIN_PMOVUSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32561 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask, "__builtin_ia32_pmovqd128_mask", IX86_BUILTIN_PMOVQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32562 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask, "__builtin_ia32_pmovqd256_mask", IX86_BUILTIN_PMOVQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32563 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask, "__builtin_ia32_pmovsqd128_mask", IX86_BUILTIN_PMOVSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32564 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask, "__builtin_ia32_pmovsqd256_mask", IX86_BUILTIN_PMOVSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32565 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask, "__builtin_ia32_pmovusqd128_mask", IX86_BUILTIN_PMOVUSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32566 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask, "__builtin_ia32_pmovusqd256_mask", IX86_BUILTIN_PMOVUSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32567 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4df_mask, "__builtin_ia32_rangepd256_mask", IX86_BUILTIN_RANGEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32568 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv2df_mask, "__builtin_ia32_rangepd128_mask", IX86_BUILTIN_RANGEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32569 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv8sf_mask, "__builtin_ia32_rangeps256_mask", IX86_BUILTIN_RANGEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32570 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4sf_mask, "__builtin_ia32_rangeps128_mask", IX86_BUILTIN_RANGEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32571 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv8sf_mask, "__builtin_ia32_getexpps256_mask", IX86_BUILTIN_GETEXPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32572 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4df_mask, "__builtin_ia32_getexppd256_mask", IX86_BUILTIN_GETEXPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32573 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4sf_mask, "__builtin_ia32_getexpps128_mask", IX86_BUILTIN_GETEXPPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32574 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv2df_mask, "__builtin_ia32_getexppd128_mask", IX86_BUILTIN_GETEXPPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32575 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_mask, "__builtin_ia32_fixupimmpd256_mask", IX86_BUILTIN_FIXUPIMMPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32576 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_maskz, "__builtin_ia32_fixupimmpd256_maskz", IX86_BUILTIN_FIXUPIMMPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32577 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_mask, "__builtin_ia32_fixupimmps256_mask", IX86_BUILTIN_FIXUPIMMPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32578 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_maskz, "__builtin_ia32_fixupimmps256_maskz", IX86_BUILTIN_FIXUPIMMPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32579 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_mask, "__builtin_ia32_fixupimmpd128_mask", IX86_BUILTIN_FIXUPIMMPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32580 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_maskz, "__builtin_ia32_fixupimmpd128_maskz", IX86_BUILTIN_FIXUPIMMPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32581 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_mask, "__builtin_ia32_fixupimmps128_mask", IX86_BUILTIN_FIXUPIMMPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32582 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_maskz, "__builtin_ia32_fixupimmps128_maskz", IX86_BUILTIN_FIXUPIMMPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32583 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4di2_mask, "__builtin_ia32_pabsq256_mask", IX86_BUILTIN_PABSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32584 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv2di2_mask, "__builtin_ia32_pabsq128_mask", IX86_BUILTIN_PABSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32585 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8si2_mask, "__builtin_ia32_pabsd256_mask", IX86_BUILTIN_PABSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32586 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4si2_mask, "__builtin_ia32_pabsd128_mask", IX86_BUILTIN_PABSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32587 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pmulhrswv16hi3_mask , "__builtin_ia32_pmulhrsw256_mask", IX86_BUILTIN_PMULHRSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32588 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pmulhrswv8hi3_mask, "__builtin_ia32_pmulhrsw128_mask", IX86_BUILTIN_PMULHRSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32589 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv8hi3_highpart_mask, "__builtin_ia32_pmulhuw128_mask", IX86_BUILTIN_PMULHUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32590 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv16hi3_highpart_mask, "__builtin_ia32_pmulhuw256_mask" , IX86_BUILTIN_PMULHUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32591 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv16hi3_highpart_mask, "__builtin_ia32_pmulhw256_mask" , IX86_BUILTIN_PMULHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32592 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv8hi3_highpart_mask, "__builtin_ia32_pmulhw128_mask", IX86_BUILTIN_PMULHW128_MASK, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32593 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv16hi3_mask, "__builtin_ia32_pmullw256_mask" , IX86_BUILTIN_PMULLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32594 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8hi3_mask, "__builtin_ia32_pmullw128_mask", IX86_BUILTIN_PMULLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32595 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv4di3_mask, "__builtin_ia32_pmullq256_mask", IX86_BUILTIN_PMULLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32596 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv2di3_mask, "__builtin_ia32_pmullq128_mask", IX86_BUILTIN_PMULLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32597 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4df3_mask, "__builtin_ia32_andpd256_mask", IX86_BUILTIN_ANDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32598 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2df3_mask, "__builtin_ia32_andpd128_mask", IX86_BUILTIN_ANDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32599 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8sf3_mask, "__builtin_ia32_andps256_mask", IX86_BUILTIN_ANDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32600 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4sf3_mask, "__builtin_ia32_andps128_mask", IX86_BUILTIN_ANDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32601 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv4df3_mask, "__builtin_ia32_andnpd256_mask", IX86_BUILTIN_ANDNPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32602 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2df3_mask, "__builtin_ia32_andnpd128_mask", IX86_BUILTIN_ANDNPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32603 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv8sf3_mask, "__builtin_ia32_andnps256_mask", IX86_BUILTIN_ANDNPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32604 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_andnotv4sf3_mask, "__builtin_ia32_andnps128_mask", IX86_BUILTIN_ANDNPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32605 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllwi128_mask", IX86_BUILTIN_PSLLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32606 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslldi128_mask", IX86_BUILTIN_PSLLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32607 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllqi128_mask", IX86_BUILTIN_PSLLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32608 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllw128_mask", IX86_BUILTIN_PSLLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32609 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslld128_mask", IX86_BUILTIN_PSLLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32610 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllq128_mask", IX86_BUILTIN_PSLLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32611 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllwi256_mask", IX86_BUILTIN_PSLLWI256_MASK , UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32612 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllw256_mask", IX86_BUILTIN_PSLLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32613 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslldi256_mask", IX86_BUILTIN_PSLLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32614 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslld256_mask", IX86_BUILTIN_PSLLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32615 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllqi256_mask", IX86_BUILTIN_PSLLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32616 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllq256_mask", IX86_BUILTIN_PSLLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32617 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psradi128_mask", IX86_BUILTIN_PSRADI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32618 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psrad128_mask", IX86_BUILTIN_PSRAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32619 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psradi256_mask", IX86_BUILTIN_PSRADI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32620 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psrad256_mask", IX86_BUILTIN_PSRAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32621 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraqi128_mask", IX86_BUILTIN_PSRAQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32622 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraq128_mask", IX86_BUILTIN_PSRAQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32623 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraqi256_mask", IX86_BUILTIN_PSRAQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32624 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraq256_mask", IX86_BUILTIN_PSRAQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32625 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8si3_mask, "__builtin_ia32_pandd256_mask", IX86_BUILTIN_PANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32626 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4si3_mask, "__builtin_ia32_pandd128_mask", IX86_BUILTIN_PANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32627 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrldi128_mask", IX86_BUILTIN_PSRLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32628 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrld128_mask", IX86_BUILTIN_PSRLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32629 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrldi256_mask", IX86_BUILTIN_PSRLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32630 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrld256_mask", IX86_BUILTIN_PSRLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32631 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlqi128_mask", IX86_BUILTIN_PSRLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32632 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlq128_mask", IX86_BUILTIN_PSRLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32633 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlqi256_mask", IX86_BUILTIN_PSRLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32634 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlq256_mask", IX86_BUILTIN_PSRLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32635 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4di3_mask, "__builtin_ia32_pandq256_mask", IX86_BUILTIN_PANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32636 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2di3_mask, "__builtin_ia32_pandq128_mask", IX86_BUILTIN_PANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32637 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv8si3_mask, "__builtin_ia32_pandnd256_mask", IX86_BUILTIN_PANDND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32638 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv4si3_mask, "__builtin_ia32_pandnd128_mask", IX86_BUILTIN_PANDND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32639 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv4di3_mask, "__builtin_ia32_pandnq256_mask", IX86_BUILTIN_PANDNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32640 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2di3_mask, "__builtin_ia32_pandnq128_mask", IX86_BUILTIN_PANDNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32641 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8si3_mask, "__builtin_ia32_pord256_mask", IX86_BUILTIN_PORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32642 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4si3_mask, "__builtin_ia32_pord128_mask", IX86_BUILTIN_PORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32643 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4di3_mask, "__builtin_ia32_porq256_mask", IX86_BUILTIN_PORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32644 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2di3_mask, "__builtin_ia32_porq128_mask", IX86_BUILTIN_PORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32645 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8si3_mask, "__builtin_ia32_pxord256_mask", IX86_BUILTIN_PXORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32646 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4si3_mask, "__builtin_ia32_pxord128_mask", IX86_BUILTIN_PXORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32647 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4di3_mask, "__builtin_ia32_pxorq256_mask", IX86_BUILTIN_PXORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32648 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2di3_mask, "__builtin_ia32_pxorq128_mask", IX86_BUILTIN_PXORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32649 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packsswb_mask, "__builtin_ia32_packsswb256_mask", IX86_BUILTIN_PACKSSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32650 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packsswb_mask, "__builtin_ia32_packsswb128_mask", IX86_BUILTIN_PACKSSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32651 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packuswb_mask, "__builtin_ia32_packuswb256_mask", IX86_BUILTIN_PACKUSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32652 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packuswb_mask, "__builtin_ia32_packuswb128_mask", IX86_BUILTIN_PACKUSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32653 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev8sf_mask, "__builtin_ia32_rndscaleps_256_mask", IX86_BUILTIN_RNDSCALEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32654 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4df_mask, "__builtin_ia32_rndscalepd_256_mask", IX86_BUILTIN_RNDSCALEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32655 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4sf_mask, "__builtin_ia32_rndscaleps_128_mask", IX86_BUILTIN_RNDSCALEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32656 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev2df_mask, "__builtin_ia32_rndscalepd_128_mask", IX86_BUILTIN_RNDSCALEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32657 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_mask, "__builtin_ia32_pternlogq256_mask", IX86_BUILTIN_VTERNLOGQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32658 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_maskz, "__builtin_ia32_pternlogq256_maskz", IX86_BUILTIN_VTERNLOGQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32659 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_mask, "__builtin_ia32_pternlogd256_mask", IX86_BUILTIN_VTERNLOGD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32660 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_maskz, "__builtin_ia32_pternlogd256_maskz", IX86_BUILTIN_VTERNLOGD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32661 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_mask, "__builtin_ia32_pternlogq128_mask", IX86_BUILTIN_VTERNLOGQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32662 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_maskz, "__builtin_ia32_pternlogq128_maskz", IX86_BUILTIN_VTERNLOGQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32663 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_mask, "__builtin_ia32_pternlogd128_mask", IX86_BUILTIN_VTERNLOGD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32664 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_maskz, "__builtin_ia32_pternlogd128_maskz", IX86_BUILTIN_VTERNLOGD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32665 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4df_mask, "__builtin_ia32_scalefpd256_mask", IX86_BUILTIN_SCALEFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32666 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv8sf_mask, "__builtin_ia32_scalefps256_mask", IX86_BUILTIN_SCALEFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32667 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv2df_mask, "__builtin_ia32_scalefpd128_mask", IX86_BUILTIN_SCALEFPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32668 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4sf_mask, "__builtin_ia32_scalefps128_mask", IX86_BUILTIN_SCALEFPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32669 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask, "__builtin_ia32_vfmaddpd256_mask", IX86_BUILTIN_VFMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32670 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask3, "__builtin_ia32_vfmaddpd256_mask3", IX86_BUILTIN_VFMADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32671 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_maskz, "__builtin_ia32_vfmaddpd256_maskz", IX86_BUILTIN_VFMADDPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32672 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask, "__builtin_ia32_vfmaddpd128_mask", IX86_BUILTIN_VFMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32673 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask3, "__builtin_ia32_vfmaddpd128_mask3", IX86_BUILTIN_VFMADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32674 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_maskz, "__builtin_ia32_vfmaddpd128_maskz", IX86_BUILTIN_VFMADDPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32675 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask, "__builtin_ia32_vfmaddps256_mask", IX86_BUILTIN_VFMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32676 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask3, "__builtin_ia32_vfmaddps256_mask3", IX86_BUILTIN_VFMADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32677 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_maskz, "__builtin_ia32_vfmaddps256_maskz", IX86_BUILTIN_VFMADDPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32678 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask, "__builtin_ia32_vfmaddps128_mask", IX86_BUILTIN_VFMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32679 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask3, "__builtin_ia32_vfmaddps128_mask3", IX86_BUILTIN_VFMADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32680 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_maskz, "__builtin_ia32_vfmaddps128_maskz", IX86_BUILTIN_VFMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32681 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask3, "__builtin_ia32_vfmsubpd256_mask3", IX86_BUILTIN_VFMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32682 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask3, "__builtin_ia32_vfmsubpd128_mask3", IX86_BUILTIN_VFMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32683 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask3, "__builtin_ia32_vfmsubps256_mask3", IX86_BUILTIN_VFMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32684 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32685 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32686 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32687 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32688 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask, "__builtin_ia32_vfnmaddps128_mask", IX86_BUILTIN_VFNMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32689 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32690 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32691 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32692 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask3, "__builtin_ia32_vfnmsubpd128_mask3", IX86_BUILTIN_VFNMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32693 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask, "__builtin_ia32_vfnmsubps256_mask", IX86_BUILTIN_VFNMSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32694 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask3, "__builtin_ia32_vfnmsubps256_mask3", IX86_BUILTIN_VFNMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32695 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask, "__builtin_ia32_vfnmsubps128_mask", IX86_BUILTIN_VFNMSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32696 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask3, "__builtin_ia32_vfnmsubps128_mask3", IX86_BUILTIN_VFNMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32697 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask, "__builtin_ia32_vfmaddsubpd256_mask", IX86_BUILTIN_VFMADDSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32698 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask3, "__builtin_ia32_vfmaddsubpd256_mask3", IX86_BUILTIN_VFMADDSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32699 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_maskz, "__builtin_ia32_vfmaddsubpd256_maskz", IX86_BUILTIN_VFMADDSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32700 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask, "__builtin_ia32_vfmaddsubpd128_mask", IX86_BUILTIN_VFMADDSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32701 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask3, "__builtin_ia32_vfmaddsubpd128_mask3", IX86_BUILTIN_VFMADDSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32702 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_maskz, "__builtin_ia32_vfmaddsubpd128_maskz", IX86_BUILTIN_VFMADDSUBPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32703 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask, "__builtin_ia32_vfmaddsubps256_mask", IX86_BUILTIN_VFMADDSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32704 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask3, "__builtin_ia32_vfmaddsubps256_mask3", IX86_BUILTIN_VFMADDSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32705 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_maskz, "__builtin_ia32_vfmaddsubps256_maskz", IX86_BUILTIN_VFMADDSUBPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32706 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask, "__builtin_ia32_vfmaddsubps128_mask", IX86_BUILTIN_VFMADDSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32707 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask3, "__builtin_ia32_vfmaddsubps128_mask3", IX86_BUILTIN_VFMADDSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32708 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_maskz, "__builtin_ia32_vfmaddsubps128_maskz", IX86_BUILTIN_VFMADDSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32709 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4df_mask3, "__builtin_ia32_vfmsubaddpd256_mask3", IX86_BUILTIN_VFMSUBADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32710 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v2df_mask3, "__builtin_ia32_vfmsubaddpd128_mask3", IX86_BUILTIN_VFMSUBADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32711 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v8sf_mask3, "__builtin_ia32_vfmsubaddps256_mask3", IX86_BUILTIN_VFMSUBADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32712 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4sf_mask3, "__builtin_ia32_vfmsubaddps128_mask3", IX86_BUILTIN_VFMSUBADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32713 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4df, "__builtin_ia32_insertf64x2_256_mask", IX86_BUILTIN_INSERTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI },
32714 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4di, "__builtin_ia32_inserti64x2_256_mask", IX86_BUILTIN_INSERTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI },
32715 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv16hi_mask, "__builtin_ia32_psrav16hi_mask", IX86_BUILTIN_PSRAVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32716 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv8hi_mask, "__builtin_ia32_psrav8hi_mask", IX86_BUILTIN_PSRAVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32717 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v16hi_mask, "__builtin_ia32_pmaddubsw256_mask", IX86_BUILTIN_PMADDUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_V16HI_HI },
32718 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v8hi_mask, "__builtin_ia32_pmaddubsw128_mask", IX86_BUILTIN_PMADDUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_V8HI_QI },
32719 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v16hi_mask, "__builtin_ia32_pmaddwd256_mask", IX86_BUILTIN_PMADDWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI_V8SI_QI },
32720 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v8hi_mask, "__builtin_ia32_pmaddwd128_mask", IX86_BUILTIN_PMADDWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI_V4SI_QI },
32721 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv16hi_mask, "__builtin_ia32_psrlv16hi_mask", IX86_BUILTIN_PSRLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32722 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv8hi_mask, "__builtin_ia32_psrlv8hi_mask", IX86_BUILTIN_PSRLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32723 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_fix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2dq256_mask", IX86_BUILTIN_CVTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32724 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_fix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2dq128_mask", IX86_BUILTIN_CVTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32725 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2udq256_mask", IX86_BUILTIN_CVTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32726 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2udq128_mask", IX86_BUILTIN_CVTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32727 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv4di_mask, "__builtin_ia32_cvtps2qq256_mask", IX86_BUILTIN_CVTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32728 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv2di_mask, "__builtin_ia32_cvtps2qq128_mask", IX86_BUILTIN_CVTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32729 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv4di_mask, "__builtin_ia32_cvtps2uqq256_mask", IX86_BUILTIN_CVTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32730 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv2di_mask, "__builtin_ia32_cvtps2uqq128_mask", IX86_BUILTIN_CVTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32731 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv8sf_mask, "__builtin_ia32_getmantps256_mask", IX86_BUILTIN_GETMANTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32732 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4sf_mask, "__builtin_ia32_getmantps128_mask", IX86_BUILTIN_GETMANTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32733 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4df_mask, "__builtin_ia32_getmantpd256_mask", IX86_BUILTIN_GETMANTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32734 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv2df_mask, "__builtin_ia32_getmantpd128_mask", IX86_BUILTIN_GETMANTPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32735 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movddup256_mask, "__builtin_ia32_movddup256_mask", IX86_BUILTIN_MOVDDUP256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32736 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_dupv2df_mask, "__builtin_ia32_movddup128_mask", IX86_BUILTIN_MOVDDUP128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32737 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movshdup256_mask, "__builtin_ia32_movshdup256_mask", IX86_BUILTIN_MOVSHDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32738 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movshdup_mask, "__builtin_ia32_movshdup128_mask", IX86_BUILTIN_MOVSHDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32739 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movsldup256_mask, "__builtin_ia32_movsldup256_mask", IX86_BUILTIN_MOVSLDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32740 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movsldup_mask, "__builtin_ia32_movsldup128_mask", IX86_BUILTIN_MOVSLDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32741 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4sf2_mask, "__builtin_ia32_cvtqq2ps256_mask", IX86_BUILTIN_CVTQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32742 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2sf2_mask, "__builtin_ia32_cvtqq2ps128_mask", IX86_BUILTIN_CVTQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32743 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4sf2_mask, "__builtin_ia32_cvtuqq2ps256_mask", IX86_BUILTIN_CVTUQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32744 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2sf2_mask, "__builtin_ia32_cvtuqq2ps128_mask", IX86_BUILTIN_CVTUQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32745 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4df2_mask, "__builtin_ia32_cvtqq2pd256_mask", IX86_BUILTIN_CVTQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32746 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2df2_mask, "__builtin_ia32_cvtqq2pd128_mask", IX86_BUILTIN_CVTQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32747 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4df2_mask, "__builtin_ia32_cvtuqq2pd256_mask", IX86_BUILTIN_CVTUQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32748 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2df2_mask, "__builtin_ia32_cvtuqq2pd128_mask", IX86_BUILTIN_CVTUQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32749 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_mask, "__builtin_ia32_vpermt2varq256_mask", IX86_BUILTIN_VPERMT2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32750 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_maskz, "__builtin_ia32_vpermt2varq256_maskz", IX86_BUILTIN_VPERMT2VARQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32751 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_mask, "__builtin_ia32_vpermt2vard256_mask", IX86_BUILTIN_VPERMT2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32752 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_maskz, "__builtin_ia32_vpermt2vard256_maskz", IX86_BUILTIN_VPERMT2VARD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32753 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4di3_mask, "__builtin_ia32_vpermi2varq256_mask", IX86_BUILTIN_VPERMI2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32754 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8si3_mask, "__builtin_ia32_vpermi2vard256_mask", IX86_BUILTIN_VPERMI2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32755 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_mask, "__builtin_ia32_vpermt2varpd256_mask", IX86_BUILTIN_VPERMT2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32756 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_maskz, "__builtin_ia32_vpermt2varpd256_maskz", IX86_BUILTIN_VPERMT2VARPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32757 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_mask, "__builtin_ia32_vpermt2varps256_mask", IX86_BUILTIN_VPERMT2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32758 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_maskz, "__builtin_ia32_vpermt2varps256_maskz", IX86_BUILTIN_VPERMT2VARPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32759 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4df3_mask, "__builtin_ia32_vpermi2varpd256_mask", IX86_BUILTIN_VPERMI2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32760 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8sf3_mask, "__builtin_ia32_vpermi2varps256_mask", IX86_BUILTIN_VPERMI2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32761 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_mask, "__builtin_ia32_vpermt2varq128_mask", IX86_BUILTIN_VPERMT2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32762 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_maskz, "__builtin_ia32_vpermt2varq128_maskz", IX86_BUILTIN_VPERMT2VARQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32763 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_mask, "__builtin_ia32_vpermt2vard128_mask", IX86_BUILTIN_VPERMT2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32764 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_maskz, "__builtin_ia32_vpermt2vard128_maskz", IX86_BUILTIN_VPERMT2VARD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32765 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2di3_mask, "__builtin_ia32_vpermi2varq128_mask", IX86_BUILTIN_VPERMI2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32766 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4si3_mask, "__builtin_ia32_vpermi2vard128_mask", IX86_BUILTIN_VPERMI2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32767 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_mask, "__builtin_ia32_vpermt2varpd128_mask", IX86_BUILTIN_VPERMT2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32768 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_maskz, "__builtin_ia32_vpermt2varpd128_maskz", IX86_BUILTIN_VPERMT2VARPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32769 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_mask, "__builtin_ia32_vpermt2varps128_mask", IX86_BUILTIN_VPERMT2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32770 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_maskz, "__builtin_ia32_vpermt2varps128_maskz", IX86_BUILTIN_VPERMT2VARPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32771 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2df3_mask, "__builtin_ia32_vpermi2varpd128_mask", IX86_BUILTIN_VPERMI2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32772 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4sf3_mask, "__builtin_ia32_vpermi2varps128_mask", IX86_BUILTIN_VPERMI2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32773 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pshufbv32qi3_mask, "__builtin_ia32_pshufb256_mask", IX86_BUILTIN_PSHUFB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32774 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pshufbv16qi3_mask, "__builtin_ia32_pshufb128_mask", IX86_BUILTIN_PSHUFB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32775 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhwv3_mask, "__builtin_ia32_pshufhw256_mask", IX86_BUILTIN_PSHUFHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32776 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhw_mask, "__builtin_ia32_pshufhw128_mask", IX86_BUILTIN_PSHUFHW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32777 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflwv3_mask, "__builtin_ia32_pshuflw256_mask", IX86_BUILTIN_PSHUFLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32778 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflw_mask, "__builtin_ia32_pshuflw128_mask", IX86_BUILTIN_PSHUFLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32779 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufdv3_mask, "__builtin_ia32_pshufd256_mask", IX86_BUILTIN_PSHUFD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32780 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufd_mask, "__builtin_ia32_pshufd128_mask", IX86_BUILTIN_PSHUFD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32781 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufpd256_mask, "__builtin_ia32_shufpd256_mask", IX86_BUILTIN_SHUFPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32782 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_shufpd_mask, "__builtin_ia32_shufpd128_mask", IX86_BUILTIN_SHUFPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32783 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufps256_mask, "__builtin_ia32_shufps256_mask", IX86_BUILTIN_SHUFPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32784 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_shufps_mask, "__builtin_ia32_shufps128_mask", IX86_BUILTIN_SHUFPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32785 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4di_mask, "__builtin_ia32_prolvq256_mask", IX86_BUILTIN_PROLVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32786 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv2di_mask, "__builtin_ia32_prolvq128_mask", IX86_BUILTIN_PROLVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32787 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4di_mask, "__builtin_ia32_prolq256_mask", IX86_BUILTIN_PROLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32788 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv2di_mask, "__builtin_ia32_prolq128_mask", IX86_BUILTIN_PROLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32789 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4di_mask, "__builtin_ia32_prorvq256_mask", IX86_BUILTIN_PRORVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32790 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv2di_mask, "__builtin_ia32_prorvq128_mask", IX86_BUILTIN_PRORVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32791 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4di_mask, "__builtin_ia32_prorq256_mask", IX86_BUILTIN_PRORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32792 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv2di_mask, "__builtin_ia32_prorq128_mask", IX86_BUILTIN_PRORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32793 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv2di_mask, "__builtin_ia32_psravq128_mask", IX86_BUILTIN_PSRAVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32794 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4di_mask, "__builtin_ia32_psravq256_mask", IX86_BUILTIN_PSRAVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32795 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4di_mask, "__builtin_ia32_psllv4di_mask", IX86_BUILTIN_PSLLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32796 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv2di_mask, "__builtin_ia32_psllv2di_mask", IX86_BUILTIN_PSLLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32797 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv8si_mask, "__builtin_ia32_psllv8si_mask", IX86_BUILTIN_PSLLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32798 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4si_mask, "__builtin_ia32_psllv4si_mask", IX86_BUILTIN_PSLLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32799 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv8si_mask, "__builtin_ia32_psrav8si_mask", IX86_BUILTIN_PSRAVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32800 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4si_mask, "__builtin_ia32_psrav4si_mask", IX86_BUILTIN_PSRAVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32801 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4di_mask, "__builtin_ia32_psrlv4di_mask", IX86_BUILTIN_PSRLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32802 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv2di_mask, "__builtin_ia32_psrlv2di_mask", IX86_BUILTIN_PSRLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32803 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv8si_mask, "__builtin_ia32_psrlv8si_mask", IX86_BUILTIN_PSRLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32804 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4si_mask, "__builtin_ia32_psrlv4si_mask", IX86_BUILTIN_PSRLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32805 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psrawi256_mask", IX86_BUILTIN_PSRAWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32806 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psraw256_mask", IX86_BUILTIN_PSRAW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32807 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psrawi128_mask", IX86_BUILTIN_PSRAWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32808 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psraw128_mask", IX86_BUILTIN_PSRAW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32809 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlwi256_mask", IX86_BUILTIN_PSRLWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32810 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlw256_mask", IX86_BUILTIN_PSRLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32811 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlwi128_mask", IX86_BUILTIN_PSRLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32812 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlw128_mask", IX86_BUILTIN_PSRLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32813 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv8si_mask, "__builtin_ia32_prorvd256_mask", IX86_BUILTIN_PRORVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32814 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv8si_mask, "__builtin_ia32_prolvd256_mask", IX86_BUILTIN_PROLVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32815 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv8si_mask, "__builtin_ia32_prord256_mask", IX86_BUILTIN_PRORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32816 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv8si_mask, "__builtin_ia32_prold256_mask", IX86_BUILTIN_PROLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32817 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4si_mask, "__builtin_ia32_prorvd128_mask", IX86_BUILTIN_PRORVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32818 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4si_mask, "__builtin_ia32_prolvd128_mask", IX86_BUILTIN_PROLVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32819 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4si_mask, "__builtin_ia32_prord128_mask", IX86_BUILTIN_PRORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32820 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4si_mask, "__builtin_ia32_prold128_mask", IX86_BUILTIN_PROLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32821 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4df_mask, "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256, UNKNOWN, (int) QI_FTYPE_V4DF_INT_QI },
32822 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv2df_mask, "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128, UNKNOWN, (int) QI_FTYPE_V2DF_INT_QI },
32823 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv2df, "__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int) QI_FTYPE_V2DF_INT },
32824 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv8sf_mask, "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256, UNKNOWN, (int) QI_FTYPE_V8SF_INT_QI },
32825 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4sf_mask, "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128, UNKNOWN, (int) QI_FTYPE_V4SF_INT_QI },
32826 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv4sf, "__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int) QI_FTYPE_V4SF_INT },
32827 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128", IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) HI_FTYPE_V16QI },
32828 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256", IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) SI_FTYPE_V32QI },
32829 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128", IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) QI_FTYPE_V8HI },
32830 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv16hi, "__builtin_ia32_cvtw2mask256", IX86_BUILTIN_CVTW2MASK256, UNKNOWN, (int) HI_FTYPE_V16HI },
32831 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv4si, "__builtin_ia32_cvtd2mask128", IX86_BUILTIN_CVTD2MASK128, UNKNOWN, (int) QI_FTYPE_V4SI },
32832 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv8si, "__builtin_ia32_cvtd2mask256", IX86_BUILTIN_CVTD2MASK256, UNKNOWN, (int) QI_FTYPE_V8SI },
32833 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv2di, "__builtin_ia32_cvtq2mask128", IX86_BUILTIN_CVTQ2MASK128, UNKNOWN, (int) QI_FTYPE_V2DI },
32834 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv4di, "__builtin_ia32_cvtq2mask256", IX86_BUILTIN_CVTQ2MASK256, UNKNOWN, (int) QI_FTYPE_V4DI },
32835 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv16qi, "__builtin_ia32_cvtmask2b128", IX86_BUILTIN_CVTMASK2B128, UNKNOWN, (int) V16QI_FTYPE_HI },
32836 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv32qi, "__builtin_ia32_cvtmask2b256", IX86_BUILTIN_CVTMASK2B256, UNKNOWN, (int) V32QI_FTYPE_SI },
32837 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv8hi, "__builtin_ia32_cvtmask2w128", IX86_BUILTIN_CVTMASK2W128, UNKNOWN, (int) V8HI_FTYPE_QI },
32838 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv16hi, "__builtin_ia32_cvtmask2w256", IX86_BUILTIN_CVTMASK2W256, UNKNOWN, (int) V16HI_FTYPE_HI },
32839 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv4si, "__builtin_ia32_cvtmask2d128", IX86_BUILTIN_CVTMASK2D128, UNKNOWN, (int) V4SI_FTYPE_QI },
32840 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv8si, "__builtin_ia32_cvtmask2d256", IX86_BUILTIN_CVTMASK2D256, UNKNOWN, (int) V8SI_FTYPE_QI },
32841 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv2di, "__builtin_ia32_cvtmask2q128", IX86_BUILTIN_CVTMASK2Q128, UNKNOWN, (int) V2DI_FTYPE_QI },
32842 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv4di, "__builtin_ia32_cvtmask2q256", IX86_BUILTIN_CVTMASK2Q256, UNKNOWN, (int) V4DI_FTYPE_QI },
32843 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16qi3_mask, "__builtin_ia32_pcmpeqb128_mask", IX86_BUILTIN_PCMPEQB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32844 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv32qi3_mask, "__builtin_ia32_pcmpeqb256_mask", IX86_BUILTIN_PCMPEQB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32845 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8hi3_mask, "__builtin_ia32_pcmpeqw128_mask", IX86_BUILTIN_PCMPEQW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32846 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16hi3_mask, "__builtin_ia32_pcmpeqw256_mask", IX86_BUILTIN_PCMPEQW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32847 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4si3_mask, "__builtin_ia32_pcmpeqd128_mask", IX86_BUILTIN_PCMPEQD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32848 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8si3_mask, "__builtin_ia32_pcmpeqd256_mask", IX86_BUILTIN_PCMPEQD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32849 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv2di3_mask, "__builtin_ia32_pcmpeqq128_mask", IX86_BUILTIN_PCMPEQQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32850 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4di3_mask, "__builtin_ia32_pcmpeqq256_mask", IX86_BUILTIN_PCMPEQQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32851 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16qi3_mask, "__builtin_ia32_pcmpgtb128_mask", IX86_BUILTIN_PCMPGTB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32852 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv32qi3_mask, "__builtin_ia32_pcmpgtb256_mask", IX86_BUILTIN_PCMPGTB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32853 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8hi3_mask, "__builtin_ia32_pcmpgtw128_mask", IX86_BUILTIN_PCMPGTW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32854 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16hi3_mask, "__builtin_ia32_pcmpgtw256_mask", IX86_BUILTIN_PCMPGTW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32855 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4si3_mask, "__builtin_ia32_pcmpgtd128_mask", IX86_BUILTIN_PCMPGTD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32856 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8si3_mask, "__builtin_ia32_pcmpgtd256_mask", IX86_BUILTIN_PCMPGTD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32857 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv2di3_mask, "__builtin_ia32_pcmpgtq128_mask", IX86_BUILTIN_PCMPGTQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32858 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4di3_mask, "__builtin_ia32_pcmpgtq256_mask", IX86_BUILTIN_PCMPGTQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32859 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16qi3_mask, "__builtin_ia32_ptestmb128", IX86_BUILTIN_PTESTMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32860 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv32qi3_mask, "__builtin_ia32_ptestmb256", IX86_BUILTIN_PTESTMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32861 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8hi3_mask, "__builtin_ia32_ptestmw128", IX86_BUILTIN_PTESTMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32862 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16hi3_mask, "__builtin_ia32_ptestmw256", IX86_BUILTIN_PTESTMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32863 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4si3_mask, "__builtin_ia32_ptestmd128", IX86_BUILTIN_PTESTMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32864 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8si3_mask, "__builtin_ia32_ptestmd256", IX86_BUILTIN_PTESTMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32865 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv2di3_mask, "__builtin_ia32_ptestmq128", IX86_BUILTIN_PTESTMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32866 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4di3_mask, "__builtin_ia32_ptestmq256", IX86_BUILTIN_PTESTMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32867 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16qi3_mask, "__builtin_ia32_ptestnmb128", IX86_BUILTIN_PTESTNMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32868 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv32qi3_mask, "__builtin_ia32_ptestnmb256", IX86_BUILTIN_PTESTNMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32869 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8hi3_mask, "__builtin_ia32_ptestnmw128", IX86_BUILTIN_PTESTNMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32870 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16hi3_mask, "__builtin_ia32_ptestnmw256", IX86_BUILTIN_PTESTNMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32871 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4si3_mask, "__builtin_ia32_ptestnmd128", IX86_BUILTIN_PTESTNMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32872 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8si3_mask, "__builtin_ia32_ptestnmd256", IX86_BUILTIN_PTESTNMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32873 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv2di3_mask, "__builtin_ia32_ptestnmq128", IX86_BUILTIN_PTESTNMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32874 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4di3_mask, "__builtin_ia32_ptestnmq256", IX86_BUILTIN_PTESTNMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32875 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv2di, "__builtin_ia32_broadcastmb128", IX86_BUILTIN_PBROADCASTMB128, UNKNOWN, (int) V2DI_FTYPE_QI },
32876 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv4di, "__builtin_ia32_broadcastmb256", IX86_BUILTIN_PBROADCASTMB256, UNKNOWN, (int) V4DI_FTYPE_QI },
32877 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv4si, "__builtin_ia32_broadcastmw128", IX86_BUILTIN_PBROADCASTMW128, UNKNOWN, (int) V4SI_FTYPE_HI },
32878 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv8si, "__builtin_ia32_broadcastmw256", IX86_BUILTIN_PBROADCASTMW256, UNKNOWN, (int) V8SI_FTYPE_HI },
32879 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4df_mask, "__builtin_ia32_compressdf256_mask", IX86_BUILTIN_COMPRESSPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32880 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2df_mask, "__builtin_ia32_compressdf128_mask", IX86_BUILTIN_COMPRESSPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32881 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8sf_mask, "__builtin_ia32_compresssf256_mask", IX86_BUILTIN_COMPRESSPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32882 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4sf_mask, "__builtin_ia32_compresssf128_mask", IX86_BUILTIN_COMPRESSPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32883 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4di_mask, "__builtin_ia32_compressdi256_mask", IX86_BUILTIN_PCOMPRESSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32884 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2di_mask, "__builtin_ia32_compressdi128_mask", IX86_BUILTIN_PCOMPRESSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32885 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8si_mask, "__builtin_ia32_compresssi256_mask", IX86_BUILTIN_PCOMPRESSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32886 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4si_mask, "__builtin_ia32_compresssi128_mask", IX86_BUILTIN_PCOMPRESSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32887 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expanddf256_mask", IX86_BUILTIN_EXPANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32888 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expanddf128_mask", IX86_BUILTIN_EXPANDPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32889 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandsf256_mask", IX86_BUILTIN_EXPANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32890 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandsf128_mask", IX86_BUILTIN_EXPANDPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32891 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expanddi256_mask", IX86_BUILTIN_PEXPANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32892 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expanddi128_mask", IX86_BUILTIN_PEXPANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32893 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandsi256_mask", IX86_BUILTIN_PEXPANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32894 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandsi128_mask", IX86_BUILTIN_PEXPANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32895 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expanddf256_maskz", IX86_BUILTIN_EXPANDPD256Z, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32896 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expanddf128_maskz", IX86_BUILTIN_EXPANDPD128Z, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32897 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandsf256_maskz", IX86_BUILTIN_EXPANDPS256Z, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32898 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandsf128_maskz", IX86_BUILTIN_EXPANDPS128Z, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32899 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expanddi256_maskz", IX86_BUILTIN_PEXPANDQ256Z, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32900 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expanddi128_maskz", IX86_BUILTIN_PEXPANDQ128Z, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32901 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandsi256_maskz", IX86_BUILTIN_PEXPANDD256Z, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32902 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandsi128_maskz", IX86_BUILTIN_PEXPANDD128Z, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32903 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8si3_mask, "__builtin_ia32_pmaxsd256_mask", IX86_BUILTIN_PMAXSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32904 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8si3_mask, "__builtin_ia32_pminsd256_mask", IX86_BUILTIN_PMINSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32905 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8si3_mask, "__builtin_ia32_pmaxud256_mask", IX86_BUILTIN_PMAXUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32906 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8si3_mask, "__builtin_ia32_pminud256_mask", IX86_BUILTIN_PMINUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32907 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4si3_mask, "__builtin_ia32_pmaxsd128_mask", IX86_BUILTIN_PMAXSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32908 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4si3_mask, "__builtin_ia32_pminsd128_mask", IX86_BUILTIN_PMINSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32909 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4si3_mask, "__builtin_ia32_pmaxud128_mask", IX86_BUILTIN_PMAXUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32910 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4si3_mask, "__builtin_ia32_pminud128_mask", IX86_BUILTIN_PMINUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32911 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4di3_mask, "__builtin_ia32_pmaxsq256_mask", IX86_BUILTIN_PMAXSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32912 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4di3_mask, "__builtin_ia32_pminsq256_mask", IX86_BUILTIN_PMINSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32913 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4di3_mask, "__builtin_ia32_pmaxuq256_mask", IX86_BUILTIN_PMAXUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32914 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4di3_mask, "__builtin_ia32_pminuq256_mask", IX86_BUILTIN_PMINUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32915 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2di3_mask, "__builtin_ia32_pmaxsq128_mask", IX86_BUILTIN_PMAXSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32916 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2di3_mask, "__builtin_ia32_pminsq128_mask", IX86_BUILTIN_PMINSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32917 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv2di3_mask, "__builtin_ia32_pmaxuq128_mask", IX86_BUILTIN_PMAXUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32918 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv2di3_mask, "__builtin_ia32_pminuq128_mask", IX86_BUILTIN_PMINUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32919 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv32qi3_mask, "__builtin_ia32_pminsb256_mask", IX86_BUILTIN_PMINSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32920 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv32qi3_mask, "__builtin_ia32_pminub256_mask", IX86_BUILTIN_PMINUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32921 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv32qi3_mask, "__builtin_ia32_pmaxsb256_mask", IX86_BUILTIN_PMAXSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32922 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv32qi3_mask, "__builtin_ia32_pmaxub256_mask", IX86_BUILTIN_PMAXUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32923 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16qi3_mask, "__builtin_ia32_pminsb128_mask", IX86_BUILTIN_PMINSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32924 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16qi3_mask, "__builtin_ia32_pminub128_mask", IX86_BUILTIN_PMINUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32925 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16qi3_mask, "__builtin_ia32_pmaxsb128_mask", IX86_BUILTIN_PMAXSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32926 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16qi3_mask, "__builtin_ia32_pmaxub128_mask", IX86_BUILTIN_PMAXUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32927 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16hi3_mask, "__builtin_ia32_pminsw256_mask", IX86_BUILTIN_PMINSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32928 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16hi3_mask, "__builtin_ia32_pminuw256_mask", IX86_BUILTIN_PMINUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32929 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16hi3_mask, "__builtin_ia32_pmaxsw256_mask", IX86_BUILTIN_PMAXSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32930 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16hi3_mask, "__builtin_ia32_pmaxuw256_mask", IX86_BUILTIN_PMAXUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32931 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8hi3_mask, "__builtin_ia32_pminsw128_mask", IX86_BUILTIN_PMINSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32932 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8hi3_mask, "__builtin_ia32_pminuw128_mask", IX86_BUILTIN_PMINUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32933 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8hi3_mask, "__builtin_ia32_pmaxsw128_mask", IX86_BUILTIN_PMAXSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32934 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8hi3_mask, "__builtin_ia32_pmaxuw128_mask", IX86_BUILTIN_PMAXUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32935 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4di_mask, "__builtin_ia32_vpconflictdi_256_mask", IX86_BUILTIN_VPCONFLICTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32936 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv8si_mask, "__builtin_ia32_vpconflictsi_256_mask", IX86_BUILTIN_VPCONFLICTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32937 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4di2_mask, "__builtin_ia32_vplzcntq_256_mask", IX86_BUILTIN_VPCLZCNTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32938 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv8si2_mask, "__builtin_ia32_vplzcntd_256_mask", IX86_BUILTIN_VPCLZCNTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32939 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhpd256_mask, "__builtin_ia32_unpckhpd256_mask", IX86_BUILTIN_UNPCKHPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32940 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpckhpd128_mask, "__builtin_ia32_unpckhpd128_mask", IX86_BUILTIN_UNPCKHPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32941 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhps256_mask, "__builtin_ia32_unpckhps256_mask", IX86_BUILTIN_UNPCKHPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32942 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4sf_mask, "__builtin_ia32_unpckhps128_mask", IX86_BUILTIN_UNPCKHPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32943 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklpd256_mask, "__builtin_ia32_unpcklpd256_mask", IX86_BUILTIN_UNPCKLPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32944 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpcklpd128_mask, "__builtin_ia32_unpcklpd128_mask", IX86_BUILTIN_UNPCKLPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32945 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklps256_mask, "__builtin_ia32_unpcklps256_mask", IX86_BUILTIN_UNPCKLPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32946 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv2di_mask, "__builtin_ia32_vpconflictdi_128_mask", IX86_BUILTIN_VPCONFLICTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32947 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4si_mask, "__builtin_ia32_vpconflictsi_128_mask", IX86_BUILTIN_VPCONFLICTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32948 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv2di2_mask, "__builtin_ia32_vplzcntq_128_mask", IX86_BUILTIN_VPCLZCNTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32949 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4si2_mask, "__builtin_ia32_vplzcntd_128_mask", IX86_BUILTIN_VPCLZCNTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32950 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_unpcklps128_mask, "__builtin_ia32_unpcklps128_mask", IX86_BUILTIN_UNPCKLPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32951 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv8si_mask, "__builtin_ia32_alignd256_mask", IX86_BUILTIN_ALIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32952 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4di_mask, "__builtin_ia32_alignq256_mask", IX86_BUILTIN_ALIGNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32953 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4si_mask, "__builtin_ia32_alignd128_mask", IX86_BUILTIN_ALIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI },
32954 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv2di_mask, "__builtin_ia32_alignq128_mask", IX86_BUILTIN_ALIGNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI },
32955 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph256_mask, "__builtin_ia32_vcvtps2ph256_mask", IX86_BUILTIN_CVTPS2PH256_MASK, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT_V8HI_QI },
32956 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph_mask, "__builtin_ia32_vcvtps2ph_mask", IX86_BUILTIN_CVTPS2PH_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT_V8HI_QI },
32957 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps_mask, "__builtin_ia32_vcvtph2ps_mask", IX86_BUILTIN_CVTPH2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V8HI_V4SF_QI },
32958 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps256_mask, "__builtin_ia32_vcvtph2ps256_mask", IX86_BUILTIN_CVTPH2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8HI_V8SF_QI },
32959 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4si_mask, "__builtin_ia32_punpckhdq128_mask", IX86_BUILTIN_PUNPCKHDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32960 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv8si_mask, "__builtin_ia32_punpckhdq256_mask", IX86_BUILTIN_PUNPCKHDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32961 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv2di_mask, "__builtin_ia32_punpckhqdq128_mask", IX86_BUILTIN_PUNPCKHQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32962 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv4di_mask, "__builtin_ia32_punpckhqdq256_mask", IX86_BUILTIN_PUNPCKHQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32963 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv4si_mask, "__builtin_ia32_punpckldq128_mask", IX86_BUILTIN_PUNPCKLDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32964 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv8si_mask, "__builtin_ia32_punpckldq256_mask", IX86_BUILTIN_PUNPCKLDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32965 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv2di_mask, "__builtin_ia32_punpcklqdq128_mask", IX86_BUILTIN_PUNPCKLQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32966 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv4di_mask, "__builtin_ia32_punpcklqdq256_mask", IX86_BUILTIN_PUNPCKLQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32967 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv16qi_mask, "__builtin_ia32_punpckhbw128_mask", IX86_BUILTIN_PUNPCKHBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32968 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv32qi_mask, "__builtin_ia32_punpckhbw256_mask", IX86_BUILTIN_PUNPCKHBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32969 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv8hi_mask, "__builtin_ia32_punpckhwd128_mask", IX86_BUILTIN_PUNPCKHWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32970 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv16hi_mask, "__builtin_ia32_punpckhwd256_mask", IX86_BUILTIN_PUNPCKHWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32971 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv16qi_mask, "__builtin_ia32_punpcklbw128_mask", IX86_BUILTIN_PUNPCKLBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32972 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv32qi_mask, "__builtin_ia32_punpcklbw256_mask", IX86_BUILTIN_PUNPCKLBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32973 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv8hi_mask, "__builtin_ia32_punpcklwd128_mask", IX86_BUILTIN_PUNPCKLWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32974 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv16hi_mask, "__builtin_ia32_punpcklwd256_mask", IX86_BUILTIN_PUNPCKLWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32975 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv16hi_mask, "__builtin_ia32_psllv16hi_mask", IX86_BUILTIN_PSLLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32976 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv8hi_mask, "__builtin_ia32_psllv8hi_mask", IX86_BUILTIN_PSLLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32977 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packssdw_mask, "__builtin_ia32_packssdw256_mask", IX86_BUILTIN_PACKSSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32978 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packssdw_mask, "__builtin_ia32_packssdw128_mask", IX86_BUILTIN_PACKSSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32979 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packusdw_mask, "__builtin_ia32_packusdw256_mask", IX86_BUILTIN_PACKUSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32980 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_packusdw_mask, "__builtin_ia32_packusdw128_mask", IX86_BUILTIN_PACKUSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32981 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv32qi3_mask, "__builtin_ia32_pavgb256_mask", IX86_BUILTIN_PAVGB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32982 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv16hi3_mask, "__builtin_ia32_pavgw256_mask", IX86_BUILTIN_PAVGW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32983 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv16qi3_mask, "__builtin_ia32_pavgb128_mask", IX86_BUILTIN_PAVGB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32984 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv8hi3_mask, "__builtin_ia32_pavgw128_mask", IX86_BUILTIN_PAVGW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32985 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8sf_mask, "__builtin_ia32_permvarsf256_mask", IX86_BUILTIN_VPERMVARSF256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32986 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4df_mask, "__builtin_ia32_permvardf256_mask", IX86_BUILTIN_VPERMVARDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32987 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4df_mask, "__builtin_ia32_permdf256_mask", IX86_BUILTIN_VPERMDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32988 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv32qi2_mask, "__builtin_ia32_pabsb256_mask", IX86_BUILTIN_PABSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32989 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16qi2_mask, "__builtin_ia32_pabsb128_mask", IX86_BUILTIN_PABSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32990 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16hi2_mask, "__builtin_ia32_pabsw256_mask", IX86_BUILTIN_PABSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32991 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8hi2_mask, "__builtin_ia32_pabsw128_mask", IX86_BUILTIN_PABSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32992 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv2df3_mask, "__builtin_ia32_vpermilvarpd_mask", IX86_BUILTIN_VPERMILVARPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32993 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4sf3_mask, "__builtin_ia32_vpermilvarps_mask", IX86_BUILTIN_VPERMILVARPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32994 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4df3_mask, "__builtin_ia32_vpermilvarpd256_mask", IX86_BUILTIN_VPERMILVARPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32995 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv8sf3_mask, "__builtin_ia32_vpermilvarps256_mask", IX86_BUILTIN_VPERMILVARPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32996 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv2df_mask, "__builtin_ia32_vpermilpd_mask", IX86_BUILTIN_VPERMILPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32997 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4sf_mask, "__builtin_ia32_vpermilps_mask", IX86_BUILTIN_VPERMILPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32998 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4df_mask, "__builtin_ia32_vpermilpd256_mask", IX86_BUILTIN_VPERMILPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32999 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv8sf_mask, "__builtin_ia32_vpermilps256_mask", IX86_BUILTIN_VPERMILPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
33000 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4di, "__builtin_ia32_blendmq_256_mask", IX86_BUILTIN_BLENDMQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
33001 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8si, "__builtin_ia32_blendmd_256_mask", IX86_BUILTIN_BLENDMD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
33002 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4df, "__builtin_ia32_blendmpd_256_mask", IX86_BUILTIN_BLENDMPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
33003 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8sf, "__builtin_ia32_blendmps_256_mask", IX86_BUILTIN_BLENDMPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
33004 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2di, "__builtin_ia32_blendmq_128_mask", IX86_BUILTIN_BLENDMQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
33005 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4si, "__builtin_ia32_blendmd_128_mask", IX86_BUILTIN_BLENDMD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
33006 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2df, "__builtin_ia32_blendmpd_128_mask", IX86_BUILTIN_BLENDMPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
33007 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4sf, "__builtin_ia32_blendmps_128_mask", IX86_BUILTIN_BLENDMPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
33008 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16hi, "__builtin_ia32_blendmw_256_mask", IX86_BUILTIN_BLENDMW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
33009 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv32qi, "__builtin_ia32_blendmb_256_mask", IX86_BUILTIN_BLENDMB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
33010 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8hi, "__builtin_ia32_blendmw_128_mask", IX86_BUILTIN_BLENDMW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
33011 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16qi, "__builtin_ia32_blendmb_128_mask", IX86_BUILTIN_BLENDMB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
33012 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8si3_mask, "__builtin_ia32_pmulld256_mask", IX86_BUILTIN_PMULLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
33013 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4si3_mask, "__builtin_ia32_pmulld128_mask", IX86_BUILTIN_PMULLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
33014 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v8si_mask, "__builtin_ia32_pmuludq256_mask", IX86_BUILTIN_PMULUDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
33015 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_smult_even_v8si_mask, "__builtin_ia32_pmuldq256_mask", IX86_BUILTIN_PMULDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
33016 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_mulv2siv2di3_mask, "__builtin_ia32_pmuldq128_mask", IX86_BUILTIN_PMULDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
33017 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v4si_mask, "__builtin_ia32_pmuludq128_mask", IX86_BUILTIN_PMULUDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
33018 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2ps256_mask, "__builtin_ia32_cvtpd2ps256_mask", IX86_BUILTIN_CVTPD2PS256_MASK, UNKNOWN, (int) V4SF_FTYPE_V4DF_V4SF_QI },
33019 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2ps_mask, "__builtin_ia32_cvtpd2ps_mask", IX86_BUILTIN_CVTPD2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V2DF_V4SF_QI },
33020 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8si_mask, "__builtin_ia32_permvarsi256_mask", IX86_BUILTIN_VPERMVARSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
33021 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4di_mask, "__builtin_ia32_permvardi256_mask", IX86_BUILTIN_VPERMVARDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
33022 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4di_mask, "__builtin_ia32_permdi256_mask", IX86_BUILTIN_VPERMDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
33023 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4di3_mask, "__builtin_ia32_cmpq256_mask", IX86_BUILTIN_CMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
33024 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8si3_mask, "__builtin_ia32_cmpd256_mask", IX86_BUILTIN_CMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
33025 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4di3_mask, "__builtin_ia32_ucmpq256_mask", IX86_BUILTIN_UCMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
33026 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8si3_mask, "__builtin_ia32_ucmpd256_mask", IX86_BUILTIN_UCMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
33027 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv32qi3_mask, "__builtin_ia32_cmpb256_mask", IX86_BUILTIN_CMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
33028 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16hi3_mask, "__builtin_ia32_cmpw256_mask", IX86_BUILTIN_CMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
33029 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv32qi3_mask, "__builtin_ia32_ucmpb256_mask", IX86_BUILTIN_UCMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
33030 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16hi3_mask, "__builtin_ia32_ucmpw256_mask", IX86_BUILTIN_UCMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
33031 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4df3_mask, "__builtin_ia32_cmppd256_mask", IX86_BUILTIN_CMPPD256_MASK, UNKNOWN, (int) QI_FTYPE_V4DF_V4DF_INT_QI },
33032 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8sf3_mask, "__builtin_ia32_cmpps256_mask", IX86_BUILTIN_CMPPS256_MASK, UNKNOWN, (int) QI_FTYPE_V8SF_V8SF_INT_QI },
33033 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2di3_mask, "__builtin_ia32_cmpq128_mask", IX86_BUILTIN_CMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
33034 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4si3_mask, "__builtin_ia32_cmpd128_mask", IX86_BUILTIN_CMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
33035 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv2di3_mask, "__builtin_ia32_ucmpq128_mask", IX86_BUILTIN_UCMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
33036 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4si3_mask, "__builtin_ia32_ucmpd128_mask", IX86_BUILTIN_UCMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
33037 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16qi3_mask, "__builtin_ia32_cmpb128_mask", IX86_BUILTIN_CMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
33038 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8hi3_mask, "__builtin_ia32_cmpw128_mask", IX86_BUILTIN_CMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
33039 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16qi3_mask, "__builtin_ia32_ucmpb128_mask", IX86_BUILTIN_UCMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
33040 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8hi3_mask, "__builtin_ia32_ucmpw128_mask", IX86_BUILTIN_UCMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
33041 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_ia32_cmppd128_mask", IX86_BUILTIN_CMPPD128_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI },
33042 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI },
33044 /* AVX512DQ. */
33045 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
33046 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
33047 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
33048 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
33049 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_HI },
33050 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_HI },
33051 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_QI },
33052 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_QI },
33053 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_QI },
33054 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_QI },
33055 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
33056 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
33057 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
33058 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
33059 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
33060 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
33061 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
33062 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
33063 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
33064 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI},
33065 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
33066 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI },
33067 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI },
33068 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI },
33069 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI },
33070 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_QI },
33071 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_HI },
33072 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) HI_FTYPE_V16SI },
33073 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) QI_FTYPE_V8DI },
33074 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_HI },
33075 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_QI },
33077 /* AVX512BW. */
33078 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) SI_FTYPE_SI_SI },
33079 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) DI_FTYPE_DI_DI },
33080 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
33081 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
33082 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
33083 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
33084 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT },
33085 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT },
33086 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
33087 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
33088 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI },
33089 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI },
33090 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_DI },
33091 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_DI },
33092 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_SI },
33093 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_SI },
33094 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
33095 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
33096 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33097 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33098 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33099 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33100 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33101 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33102 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33103 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33104 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33105 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33106 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33107 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33108 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33109 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33110 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33111 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33112 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33113 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33114 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33115 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33116 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33117 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33118 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33119 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33120 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33121 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33122 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
33123 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
33124 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
33125 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33126 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33127 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33128 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33129 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
33130 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
33131 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
33132 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
33133 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33134 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_SI },
33135 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_HI },
33136 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33137 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33138 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33139 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33140 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33141 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33142 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
33143 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
33144 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
33145 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
33146 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
33147 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
33148 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) DI_FTYPE_V64QI },
33149 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) SI_FTYPE_V32HI },
33150 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_DI },
33151 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_SI },
33152 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
33153 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
33154 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
33155 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
33156 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
33157 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
33158 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
33159 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
33160 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33161 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
33162 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
33163 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
33164 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
33165 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
33166 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
33167 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
33168 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
33170 /* AVX512IFMA */
33171 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
33172 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
33173 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
33174 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
33175 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
33176 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
33177 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
33178 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_maskz, "__builtin_ia32_vpmadd52huq256_maskz", IX86_BUILTIN_VPMADD52HUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
33179 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_mask, "__builtin_ia32_vpmadd52luq128_mask", IX86_BUILTIN_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
33180 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_maskz, "__builtin_ia32_vpmadd52luq128_maskz", IX86_BUILTIN_VPMADD52LUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
33181 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_mask, "__builtin_ia32_vpmadd52huq128_mask", IX86_BUILTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
33182 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_maskz, "__builtin_ia32_vpmadd52huq128_maskz", IX86_BUILTIN_VPMADD52HUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
33184 /* AVX512VBMI */
33185 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33186 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv32qi_mask, "__builtin_ia32_vpmultishiftqb256_mask", IX86_BUILTIN_VPMULTISHIFTQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33187 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv16qi_mask, "__builtin_ia32_vpmultishiftqb128_mask", IX86_BUILTIN_VPMULTISHIFTQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33188 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33189 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33190 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33191 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33192 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv32qi_mask, "__builtin_ia32_permvarqi256_mask", IX86_BUILTIN_VPERMVARQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33193 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16qi_mask, "__builtin_ia32_permvarqi128_mask", IX86_BUILTIN_VPERMVARQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33194 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_mask, "__builtin_ia32_vpermt2varqi256_mask", IX86_BUILTIN_VPERMT2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33195 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_maskz, "__builtin_ia32_vpermt2varqi256_maskz", IX86_BUILTIN_VPERMT2VARQI256_MASKZ, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33196 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_mask, "__builtin_ia32_vpermt2varqi128_mask", IX86_BUILTIN_VPERMT2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33197 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_maskz, "__builtin_ia32_vpermt2varqi128_maskz", IX86_BUILTIN_VPERMT2VARQI128_MASKZ, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33198 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv32qi3_mask, "__builtin_ia32_vpermi2varqi256_mask", IX86_BUILTIN_VPERMI2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33199 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16qi3_mask, "__builtin_ia32_vpermi2varqi128_mask", IX86_BUILTIN_VPERMI2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33202 /* Builtins with rounding support. */
33203 static const struct builtin_description bdesc_round_args[] =
33205 /* AVX512F */
33206 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33207 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33208 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33209 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33210 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
33211 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
33212 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
33213 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
33214 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
33215 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
33216 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
33217 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33218 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
33219 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33220 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
33221 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33222 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
33223 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33224 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
33225 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
33226 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
33227 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
33228 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
33229 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33230 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33231 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33232 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33233 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
33234 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
33235 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
33236 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
33237 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33238 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33239 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33240 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33241 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
33242 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
33243 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
33244 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
33245 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
33246 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
33247 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
33248 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
33249 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33250 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33251 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33252 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33253 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
33254 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
33255 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33256 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33257 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33258 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33259 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33260 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33261 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33262 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33263 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33264 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33265 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33266 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33267 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33268 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33269 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
33270 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
33271 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33272 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33273 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33274 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33275 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33276 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33277 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33278 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33279 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33280 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33281 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33282 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33283 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33284 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33285 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
33286 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
33287 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
33288 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
33289 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
33290 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
33291 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
33292 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
33293 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
33294 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
33295 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
33296 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
33297 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
33298 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
33299 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
33300 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
33301 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33302 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33303 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33304 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33305 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33306 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33307 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
33308 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
33309 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33310 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33311 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33312 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33313 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33314 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33315 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33316 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33317 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33318 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33319 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33320 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33321 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33322 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33323 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33324 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33326 /* AVX512ER */
33327 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33328 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33329 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33330 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33331 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33332 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33333 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33334 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33335 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33336 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33338 /* AVX512DQ. */
33339 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv2df_round, "__builtin_ia32_rangesd128_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33340 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv4sf_round, "__builtin_ia32_rangess128_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33341 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33342 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33343 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33344 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33345 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
33346 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
33347 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
33348 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
33349 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33350 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33351 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33352 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33353 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT },
33354 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT },
33357 /* Bultins for MPX. */
33358 static const struct builtin_description bdesc_mpx[] =
33360 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
33361 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33362 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33365 /* Const builtins for MPX. */
33366 static const struct builtin_description bdesc_mpx_const[] =
33368 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
33369 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
33370 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
33371 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
33372 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
33373 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
33374 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
33375 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
33378 /* FMA4 and XOP. */
33379 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
33380 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
33381 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
33382 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
33383 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
33384 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
33385 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
33386 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
33387 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
33388 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
33389 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
33390 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
33391 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
33392 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
33393 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
33394 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
33395 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
33396 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
33397 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
33398 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
33399 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
33400 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
33401 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
33402 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
33403 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
33404 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
33405 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
33406 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
33407 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
33408 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
33409 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
33410 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
33411 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
33412 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
33413 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
33414 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
33415 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
33416 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
33417 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
33418 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
33419 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
33420 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
33421 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
33422 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
33423 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
33424 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
33425 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
33426 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
33427 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
33428 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
33429 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
33430 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
33432 static const struct builtin_description bdesc_multi_arg[] =
33434 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
33435 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
33436 UNKNOWN, (int)MULTI_ARG_3_SF },
33437 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
33438 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
33439 UNKNOWN, (int)MULTI_ARG_3_DF },
33441 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
33442 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
33443 UNKNOWN, (int)MULTI_ARG_3_SF },
33444 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
33445 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
33446 UNKNOWN, (int)MULTI_ARG_3_DF },
33448 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
33449 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
33450 UNKNOWN, (int)MULTI_ARG_3_SF },
33451 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
33452 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
33453 UNKNOWN, (int)MULTI_ARG_3_DF },
33454 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
33455 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
33456 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33457 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
33458 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
33459 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33461 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
33462 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
33463 UNKNOWN, (int)MULTI_ARG_3_SF },
33464 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
33465 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
33466 UNKNOWN, (int)MULTI_ARG_3_DF },
33467 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
33468 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
33469 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33470 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
33471 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
33472 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33474 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
33475 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
33476 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
33477 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
33478 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
33479 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
33480 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
33482 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33483 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33484 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
33485 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
33486 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
33487 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
33488 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
33490 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
33492 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33493 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33494 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33495 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33496 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33497 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33498 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33499 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33500 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33501 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33502 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33503 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33505 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33506 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
33507 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
33508 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
33509 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
33510 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
33511 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
33512 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
33513 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33514 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
33515 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
33516 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
33517 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33518 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
33519 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
33520 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
33522 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
33523 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
33524 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
33525 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
33526 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
33527 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
33529 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33530 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33531 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33532 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33533 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33534 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33535 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33536 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33537 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33538 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33539 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33540 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33541 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33542 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33543 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33545 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
33546 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33547 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33548 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
33549 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
33550 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
33551 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
33553 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
33554 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33555 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33556 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
33557 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
33558 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
33559 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
33561 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
33562 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33563 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33564 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
33565 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
33566 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
33567 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
33569 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33570 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33571 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33572 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
33573 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
33574 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
33575 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
33577 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
33578 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33579 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33580 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
33581 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
33582 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
33583 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
33585 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
33586 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33587 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33588 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
33589 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
33590 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
33591 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
33593 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
33594 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33595 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33596 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
33597 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
33598 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
33599 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
33601 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33602 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33603 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33604 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
33605 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
33606 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
33607 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
33609 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33610 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33611 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33612 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33613 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33614 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33615 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33616 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33618 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33619 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33620 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33621 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33622 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33623 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33624 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33625 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33627 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
33628 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
33629 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
33630 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
33634 /* TM vector builtins. */
33636 /* Reuse the existing x86-specific `struct builtin_description' cause
33637 we're lazy. Add casts to make them fit. */
33638 static const struct builtin_description bdesc_tm[] =
33640 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33641 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33642 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33643 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33644 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33645 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33646 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33648 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33649 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33650 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33651 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33652 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33653 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33654 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33656 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33657 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33658 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33659 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33660 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33661 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33662 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33664 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
33665 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
33666 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
33669 /* TM callbacks. */
33671 /* Return the builtin decl needed to load a vector of TYPE. */
33673 static tree
33674 ix86_builtin_tm_load (tree type)
33676 if (TREE_CODE (type) == VECTOR_TYPE)
33678 switch (tree_to_uhwi (TYPE_SIZE (type)))
33680 case 64:
33681 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
33682 case 128:
33683 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
33684 case 256:
33685 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
33688 return NULL_TREE;
33691 /* Return the builtin decl needed to store a vector of TYPE. */
33693 static tree
33694 ix86_builtin_tm_store (tree type)
33696 if (TREE_CODE (type) == VECTOR_TYPE)
33698 switch (tree_to_uhwi (TYPE_SIZE (type)))
33700 case 64:
33701 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
33702 case 128:
33703 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
33704 case 256:
33705 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
33708 return NULL_TREE;
33711 /* Initialize the transactional memory vector load/store builtins. */
33713 static void
33714 ix86_init_tm_builtins (void)
33716 enum ix86_builtin_func_type ftype;
33717 const struct builtin_description *d;
33718 size_t i;
33719 tree decl;
33720 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
33721 tree attrs_log, attrs_type_log;
33723 if (!flag_tm)
33724 return;
33726 /* If there are no builtins defined, we must be compiling in a
33727 language without trans-mem support. */
33728 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
33729 return;
33731 /* Use whatever attributes a normal TM load has. */
33732 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
33733 attrs_load = DECL_ATTRIBUTES (decl);
33734 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33735 /* Use whatever attributes a normal TM store has. */
33736 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
33737 attrs_store = DECL_ATTRIBUTES (decl);
33738 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33739 /* Use whatever attributes a normal TM log has. */
33740 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
33741 attrs_log = DECL_ATTRIBUTES (decl);
33742 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33744 for (i = 0, d = bdesc_tm;
33745 i < ARRAY_SIZE (bdesc_tm);
33746 i++, d++)
33748 if ((d->mask & ix86_isa_flags) != 0
33749 || (lang_hooks.builtin_function
33750 == lang_hooks.builtin_function_ext_scope))
33752 tree type, attrs, attrs_type;
33753 enum built_in_function code = (enum built_in_function) d->code;
33755 ftype = (enum ix86_builtin_func_type) d->flag;
33756 type = ix86_get_builtin_func_type (ftype);
33758 if (BUILTIN_TM_LOAD_P (code))
33760 attrs = attrs_load;
33761 attrs_type = attrs_type_load;
33763 else if (BUILTIN_TM_STORE_P (code))
33765 attrs = attrs_store;
33766 attrs_type = attrs_type_store;
33768 else
33770 attrs = attrs_log;
33771 attrs_type = attrs_type_log;
33773 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
33774 /* The builtin without the prefix for
33775 calling it directly. */
33776 d->name + strlen ("__builtin_"),
33777 attrs);
33778 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
33779 set the TYPE_ATTRIBUTES. */
33780 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
33782 set_builtin_decl (code, decl, false);
33787 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
33788 in the current target ISA to allow the user to compile particular modules
33789 with different target specific options that differ from the command line
33790 options. */
33791 static void
33792 ix86_init_mmx_sse_builtins (void)
33794 const struct builtin_description * d;
33795 enum ix86_builtin_func_type ftype;
33796 size_t i;
33798 /* Add all special builtins with variable number of operands. */
33799 for (i = 0, d = bdesc_special_args;
33800 i < ARRAY_SIZE (bdesc_special_args);
33801 i++, d++)
33803 if (d->name == 0)
33804 continue;
33806 ftype = (enum ix86_builtin_func_type) d->flag;
33807 def_builtin (d->mask, d->name, ftype, d->code);
33810 /* Add all builtins with variable number of operands. */
33811 for (i = 0, d = bdesc_args;
33812 i < ARRAY_SIZE (bdesc_args);
33813 i++, d++)
33815 if (d->name == 0)
33816 continue;
33818 ftype = (enum ix86_builtin_func_type) d->flag;
33819 def_builtin_const (d->mask, d->name, ftype, d->code);
33822 /* Add all builtins with rounding. */
33823 for (i = 0, d = bdesc_round_args;
33824 i < ARRAY_SIZE (bdesc_round_args);
33825 i++, d++)
33827 if (d->name == 0)
33828 continue;
33830 ftype = (enum ix86_builtin_func_type) d->flag;
33831 def_builtin_const (d->mask, d->name, ftype, d->code);
33834 /* pcmpestr[im] insns. */
33835 for (i = 0, d = bdesc_pcmpestr;
33836 i < ARRAY_SIZE (bdesc_pcmpestr);
33837 i++, d++)
33839 if (d->code == IX86_BUILTIN_PCMPESTRM128)
33840 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
33841 else
33842 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
33843 def_builtin_const (d->mask, d->name, ftype, d->code);
33846 /* pcmpistr[im] insns. */
33847 for (i = 0, d = bdesc_pcmpistr;
33848 i < ARRAY_SIZE (bdesc_pcmpistr);
33849 i++, d++)
33851 if (d->code == IX86_BUILTIN_PCMPISTRM128)
33852 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
33853 else
33854 ftype = INT_FTYPE_V16QI_V16QI_INT;
33855 def_builtin_const (d->mask, d->name, ftype, d->code);
33858 /* comi/ucomi insns. */
33859 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
33861 if (d->mask == OPTION_MASK_ISA_SSE2)
33862 ftype = INT_FTYPE_V2DF_V2DF;
33863 else
33864 ftype = INT_FTYPE_V4SF_V4SF;
33865 def_builtin_const (d->mask, d->name, ftype, d->code);
33868 /* SSE */
33869 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
33870 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
33871 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
33872 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
33874 /* SSE or 3DNow!A */
33875 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33876 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
33877 IX86_BUILTIN_MASKMOVQ);
33879 /* SSE2 */
33880 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
33881 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
33883 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
33884 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
33885 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
33886 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
33888 /* SSE3. */
33889 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
33890 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
33891 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
33892 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
33894 /* AES */
33895 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
33896 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
33897 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
33898 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
33899 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
33900 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
33901 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
33902 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
33903 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
33904 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
33905 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
33906 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
33908 /* PCLMUL */
33909 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
33910 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
33912 /* RDRND */
33913 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
33914 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
33915 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
33916 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
33917 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
33918 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
33919 IX86_BUILTIN_RDRAND64_STEP);
33921 /* AVX2 */
33922 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
33923 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
33924 IX86_BUILTIN_GATHERSIV2DF);
33926 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
33927 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
33928 IX86_BUILTIN_GATHERSIV4DF);
33930 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
33931 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
33932 IX86_BUILTIN_GATHERDIV2DF);
33934 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
33935 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
33936 IX86_BUILTIN_GATHERDIV4DF);
33938 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
33939 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
33940 IX86_BUILTIN_GATHERSIV4SF);
33942 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
33943 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
33944 IX86_BUILTIN_GATHERSIV8SF);
33946 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
33947 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
33948 IX86_BUILTIN_GATHERDIV4SF);
33950 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
33951 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
33952 IX86_BUILTIN_GATHERDIV8SF);
33954 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
33955 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
33956 IX86_BUILTIN_GATHERSIV2DI);
33958 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
33959 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
33960 IX86_BUILTIN_GATHERSIV4DI);
33962 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
33963 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
33964 IX86_BUILTIN_GATHERDIV2DI);
33966 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
33967 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
33968 IX86_BUILTIN_GATHERDIV4DI);
33970 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
33971 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
33972 IX86_BUILTIN_GATHERSIV4SI);
33974 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
33975 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
33976 IX86_BUILTIN_GATHERSIV8SI);
33978 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
33979 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
33980 IX86_BUILTIN_GATHERDIV4SI);
33982 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
33983 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
33984 IX86_BUILTIN_GATHERDIV8SI);
33986 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
33987 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
33988 IX86_BUILTIN_GATHERALTSIV4DF);
33990 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
33991 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
33992 IX86_BUILTIN_GATHERALTDIV8SF);
33994 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
33995 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
33996 IX86_BUILTIN_GATHERALTSIV4DI);
33998 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
33999 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
34000 IX86_BUILTIN_GATHERALTDIV8SI);
34002 /* AVX512F */
34003 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
34004 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
34005 IX86_BUILTIN_GATHER3SIV16SF);
34007 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
34008 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
34009 IX86_BUILTIN_GATHER3SIV8DF);
34011 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
34012 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
34013 IX86_BUILTIN_GATHER3DIV16SF);
34015 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
34016 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
34017 IX86_BUILTIN_GATHER3DIV8DF);
34019 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
34020 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
34021 IX86_BUILTIN_GATHER3SIV16SI);
34023 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
34024 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
34025 IX86_BUILTIN_GATHER3SIV8DI);
34027 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
34028 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
34029 IX86_BUILTIN_GATHER3DIV16SI);
34031 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
34032 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
34033 IX86_BUILTIN_GATHER3DIV8DI);
34035 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
34036 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
34037 IX86_BUILTIN_GATHER3ALTSIV8DF);
34039 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
34040 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
34041 IX86_BUILTIN_GATHER3ALTDIV16SF);
34043 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
34044 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
34045 IX86_BUILTIN_GATHER3ALTSIV8DI);
34047 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
34048 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
34049 IX86_BUILTIN_GATHER3ALTDIV16SI);
34051 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
34052 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
34053 IX86_BUILTIN_SCATTERSIV16SF);
34055 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
34056 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
34057 IX86_BUILTIN_SCATTERSIV8DF);
34059 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
34060 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
34061 IX86_BUILTIN_SCATTERDIV16SF);
34063 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
34064 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
34065 IX86_BUILTIN_SCATTERDIV8DF);
34067 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
34068 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
34069 IX86_BUILTIN_SCATTERSIV16SI);
34071 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
34072 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
34073 IX86_BUILTIN_SCATTERSIV8DI);
34075 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
34076 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
34077 IX86_BUILTIN_SCATTERDIV16SI);
34079 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
34080 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
34081 IX86_BUILTIN_SCATTERDIV8DI);
34083 /* AVX512VL */
34084 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
34085 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_QI_INT,
34086 IX86_BUILTIN_GATHER3SIV2DF);
34088 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
34089 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_QI_INT,
34090 IX86_BUILTIN_GATHER3SIV4DF);
34092 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
34093 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_QI_INT,
34094 IX86_BUILTIN_GATHER3DIV2DF);
34096 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
34097 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_QI_INT,
34098 IX86_BUILTIN_GATHER3DIV4DF);
34100 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
34101 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_QI_INT,
34102 IX86_BUILTIN_GATHER3SIV4SF);
34104 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
34105 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_QI_INT,
34106 IX86_BUILTIN_GATHER3SIV8SF);
34108 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
34109 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_QI_INT,
34110 IX86_BUILTIN_GATHER3DIV4SF);
34112 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
34113 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_QI_INT,
34114 IX86_BUILTIN_GATHER3DIV8SF);
34116 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
34117 V2DI_FTYPE_V2DI_PCINT64_V4SI_QI_INT,
34118 IX86_BUILTIN_GATHER3SIV2DI);
34120 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
34121 V4DI_FTYPE_V4DI_PCINT64_V4SI_QI_INT,
34122 IX86_BUILTIN_GATHER3SIV4DI);
34124 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
34125 V2DI_FTYPE_V2DI_PCINT64_V2DI_QI_INT,
34126 IX86_BUILTIN_GATHER3DIV2DI);
34128 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
34129 V4DI_FTYPE_V4DI_PCINT64_V4DI_QI_INT,
34130 IX86_BUILTIN_GATHER3DIV4DI);
34132 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
34133 V4SI_FTYPE_V4SI_PCINT_V4SI_QI_INT,
34134 IX86_BUILTIN_GATHER3SIV4SI);
34136 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
34137 V8SI_FTYPE_V8SI_PCINT_V8SI_QI_INT,
34138 IX86_BUILTIN_GATHER3SIV8SI);
34140 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
34141 V4SI_FTYPE_V4SI_PCINT_V2DI_QI_INT,
34142 IX86_BUILTIN_GATHER3DIV4SI);
34144 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
34145 V4SI_FTYPE_V4SI_PCINT_V4DI_QI_INT,
34146 IX86_BUILTIN_GATHER3DIV8SI);
34148 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
34149 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
34150 IX86_BUILTIN_GATHER3ALTSIV4DF);
34152 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
34153 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
34154 IX86_BUILTIN_GATHER3ALTDIV8SF);
34156 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
34157 V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
34158 IX86_BUILTIN_GATHER3ALTSIV4DI);
34160 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
34161 V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
34162 IX86_BUILTIN_GATHER3ALTDIV8SI);
34164 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
34165 VOID_FTYPE_PFLOAT_QI_V8SI_V8SF_INT,
34166 IX86_BUILTIN_SCATTERSIV8SF);
34168 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
34169 VOID_FTYPE_PFLOAT_QI_V4SI_V4SF_INT,
34170 IX86_BUILTIN_SCATTERSIV4SF);
34172 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
34173 VOID_FTYPE_PDOUBLE_QI_V4SI_V4DF_INT,
34174 IX86_BUILTIN_SCATTERSIV4DF);
34176 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
34177 VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT,
34178 IX86_BUILTIN_SCATTERSIV2DF);
34180 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
34181 VOID_FTYPE_PFLOAT_QI_V4DI_V4SF_INT,
34182 IX86_BUILTIN_SCATTERDIV8SF);
34184 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
34185 VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT,
34186 IX86_BUILTIN_SCATTERDIV4SF);
34188 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
34189 VOID_FTYPE_PDOUBLE_QI_V4DI_V4DF_INT,
34190 IX86_BUILTIN_SCATTERDIV4DF);
34192 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
34193 VOID_FTYPE_PDOUBLE_QI_V2DI_V2DF_INT,
34194 IX86_BUILTIN_SCATTERDIV2DF);
34196 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
34197 VOID_FTYPE_PINT_QI_V8SI_V8SI_INT,
34198 IX86_BUILTIN_SCATTERSIV8SI);
34200 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
34201 VOID_FTYPE_PINT_QI_V4SI_V4SI_INT,
34202 IX86_BUILTIN_SCATTERSIV4SI);
34204 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
34205 VOID_FTYPE_PLONGLONG_QI_V4SI_V4DI_INT,
34206 IX86_BUILTIN_SCATTERSIV4DI);
34208 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
34209 VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT,
34210 IX86_BUILTIN_SCATTERSIV2DI);
34212 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
34213 VOID_FTYPE_PINT_QI_V4DI_V4SI_INT,
34214 IX86_BUILTIN_SCATTERDIV8SI);
34216 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
34217 VOID_FTYPE_PINT_QI_V2DI_V4SI_INT,
34218 IX86_BUILTIN_SCATTERDIV4SI);
34220 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
34221 VOID_FTYPE_PLONGLONG_QI_V4DI_V4DI_INT,
34222 IX86_BUILTIN_SCATTERDIV4DI);
34224 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
34225 VOID_FTYPE_PLONGLONG_QI_V2DI_V2DI_INT,
34226 IX86_BUILTIN_SCATTERDIV2DI);
34228 /* AVX512PF */
34229 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
34230 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
34231 IX86_BUILTIN_GATHERPFDPD);
34232 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
34233 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
34234 IX86_BUILTIN_GATHERPFDPS);
34235 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
34236 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
34237 IX86_BUILTIN_GATHERPFQPD);
34238 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
34239 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
34240 IX86_BUILTIN_GATHERPFQPS);
34241 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
34242 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
34243 IX86_BUILTIN_SCATTERPFDPD);
34244 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
34245 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
34246 IX86_BUILTIN_SCATTERPFDPS);
34247 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
34248 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
34249 IX86_BUILTIN_SCATTERPFQPD);
34250 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
34251 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
34252 IX86_BUILTIN_SCATTERPFQPS);
34254 /* SHA */
34255 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
34256 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
34257 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
34258 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
34259 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
34260 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
34261 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
34262 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
34263 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
34264 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
34265 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
34266 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
34267 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
34268 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
34270 /* RTM. */
34271 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
34272 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
34274 /* MMX access to the vec_init patterns. */
34275 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
34276 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
34278 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
34279 V4HI_FTYPE_HI_HI_HI_HI,
34280 IX86_BUILTIN_VEC_INIT_V4HI);
34282 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
34283 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
34284 IX86_BUILTIN_VEC_INIT_V8QI);
34286 /* Access to the vec_extract patterns. */
34287 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
34288 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
34289 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
34290 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
34291 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
34292 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
34293 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
34294 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
34295 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
34296 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
34298 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
34299 "__builtin_ia32_vec_ext_v4hi",
34300 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
34302 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
34303 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
34305 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
34306 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
34308 /* Access to the vec_set patterns. */
34309 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
34310 "__builtin_ia32_vec_set_v2di",
34311 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
34313 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
34314 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
34316 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
34317 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
34319 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
34320 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
34322 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
34323 "__builtin_ia32_vec_set_v4hi",
34324 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
34326 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
34327 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
34329 /* RDSEED */
34330 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
34331 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
34332 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
34333 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
34334 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
34335 "__builtin_ia32_rdseed_di_step",
34336 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
34338 /* ADCX */
34339 def_builtin (0, "__builtin_ia32_addcarryx_u32",
34340 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
34341 def_builtin (OPTION_MASK_ISA_64BIT,
34342 "__builtin_ia32_addcarryx_u64",
34343 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
34344 IX86_BUILTIN_ADDCARRYX64);
34346 /* SBB */
34347 def_builtin (0, "__builtin_ia32_sbb_u32",
34348 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
34349 def_builtin (OPTION_MASK_ISA_64BIT,
34350 "__builtin_ia32_sbb_u64",
34351 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
34352 IX86_BUILTIN_SBB64);
34354 /* Read/write FLAGS. */
34355 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
34356 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34357 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
34358 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34359 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
34360 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
34361 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
34362 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
34364 /* CLFLUSHOPT. */
34365 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
34366 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
34368 /* CLWB. */
34369 def_builtin (OPTION_MASK_ISA_CLWB, "__builtin_ia32_clwb",
34370 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB);
34372 /* MONITORX and MWAITX. */
34373 def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_monitorx",
34374 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITORX);
34375 def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_mwaitx",
34376 VOID_FTYPE_UNSIGNED_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAITX);
34378 /* Add FMA4 multi-arg argument instructions */
34379 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
34381 if (d->name == 0)
34382 continue;
34384 ftype = (enum ix86_builtin_func_type) d->flag;
34385 def_builtin_const (d->mask, d->name, ftype, d->code);
34389 static void
34390 ix86_init_mpx_builtins ()
34392 const struct builtin_description * d;
34393 enum ix86_builtin_func_type ftype;
34394 tree decl;
34395 size_t i;
34397 for (i = 0, d = bdesc_mpx;
34398 i < ARRAY_SIZE (bdesc_mpx);
34399 i++, d++)
34401 if (d->name == 0)
34402 continue;
34404 ftype = (enum ix86_builtin_func_type) d->flag;
34405 decl = def_builtin (d->mask, d->name, ftype, d->code);
34407 /* With no leaf and nothrow flags for MPX builtins
34408 abnormal edges may follow its call when setjmp
34409 presents in the function. Since we may have a lot
34410 of MPX builtins calls it causes lots of useless
34411 edges and enormous PHI nodes. To avoid this we mark
34412 MPX builtins as leaf and nothrow. */
34413 if (decl)
34415 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34416 NULL_TREE);
34417 TREE_NOTHROW (decl) = 1;
34419 else
34421 ix86_builtins_isa[(int)d->code].leaf_p = true;
34422 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34426 for (i = 0, d = bdesc_mpx_const;
34427 i < ARRAY_SIZE (bdesc_mpx_const);
34428 i++, d++)
34430 if (d->name == 0)
34431 continue;
34433 ftype = (enum ix86_builtin_func_type) d->flag;
34434 decl = def_builtin_const (d->mask, d->name, ftype, d->code);
34436 if (decl)
34438 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34439 NULL_TREE);
34440 TREE_NOTHROW (decl) = 1;
34442 else
34444 ix86_builtins_isa[(int)d->code].leaf_p = true;
34445 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34450 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
34451 to return a pointer to VERSION_DECL if the outcome of the expression
34452 formed by PREDICATE_CHAIN is true. This function will be called during
34453 version dispatch to decide which function version to execute. It returns
34454 the basic block at the end, to which more conditions can be added. */
34456 static basic_block
34457 add_condition_to_bb (tree function_decl, tree version_decl,
34458 tree predicate_chain, basic_block new_bb)
34460 gimple return_stmt;
34461 tree convert_expr, result_var;
34462 gimple convert_stmt;
34463 gimple call_cond_stmt;
34464 gimple if_else_stmt;
34466 basic_block bb1, bb2, bb3;
34467 edge e12, e23;
34469 tree cond_var, and_expr_var = NULL_TREE;
34470 gimple_seq gseq;
34472 tree predicate_decl, predicate_arg;
34474 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
34476 gcc_assert (new_bb != NULL);
34477 gseq = bb_seq (new_bb);
34480 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
34481 build_fold_addr_expr (version_decl));
34482 result_var = create_tmp_var (ptr_type_node);
34483 convert_stmt = gimple_build_assign (result_var, convert_expr);
34484 return_stmt = gimple_build_return (result_var);
34486 if (predicate_chain == NULL_TREE)
34488 gimple_seq_add_stmt (&gseq, convert_stmt);
34489 gimple_seq_add_stmt (&gseq, return_stmt);
34490 set_bb_seq (new_bb, gseq);
34491 gimple_set_bb (convert_stmt, new_bb);
34492 gimple_set_bb (return_stmt, new_bb);
34493 pop_cfun ();
34494 return new_bb;
34497 while (predicate_chain != NULL)
34499 cond_var = create_tmp_var (integer_type_node);
34500 predicate_decl = TREE_PURPOSE (predicate_chain);
34501 predicate_arg = TREE_VALUE (predicate_chain);
34502 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
34503 gimple_call_set_lhs (call_cond_stmt, cond_var);
34505 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
34506 gimple_set_bb (call_cond_stmt, new_bb);
34507 gimple_seq_add_stmt (&gseq, call_cond_stmt);
34509 predicate_chain = TREE_CHAIN (predicate_chain);
34511 if (and_expr_var == NULL)
34512 and_expr_var = cond_var;
34513 else
34515 gimple assign_stmt;
34516 /* Use MIN_EXPR to check if any integer is zero?.
34517 and_expr_var = min_expr <cond_var, and_expr_var> */
34518 assign_stmt = gimple_build_assign (and_expr_var,
34519 build2 (MIN_EXPR, integer_type_node,
34520 cond_var, and_expr_var));
34522 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
34523 gimple_set_bb (assign_stmt, new_bb);
34524 gimple_seq_add_stmt (&gseq, assign_stmt);
34528 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
34529 integer_zero_node,
34530 NULL_TREE, NULL_TREE);
34531 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
34532 gimple_set_bb (if_else_stmt, new_bb);
34533 gimple_seq_add_stmt (&gseq, if_else_stmt);
34535 gimple_seq_add_stmt (&gseq, convert_stmt);
34536 gimple_seq_add_stmt (&gseq, return_stmt);
34537 set_bb_seq (new_bb, gseq);
34539 bb1 = new_bb;
34540 e12 = split_block (bb1, if_else_stmt);
34541 bb2 = e12->dest;
34542 e12->flags &= ~EDGE_FALLTHRU;
34543 e12->flags |= EDGE_TRUE_VALUE;
34545 e23 = split_block (bb2, return_stmt);
34547 gimple_set_bb (convert_stmt, bb2);
34548 gimple_set_bb (return_stmt, bb2);
34550 bb3 = e23->dest;
34551 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
34553 remove_edge (e23);
34554 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
34556 pop_cfun ();
34558 return bb3;
34561 /* This parses the attribute arguments to target in DECL and determines
34562 the right builtin to use to match the platform specification.
34563 It returns the priority value for this version decl. If PREDICATE_LIST
34564 is not NULL, it stores the list of cpu features that need to be checked
34565 before dispatching this function. */
34567 static unsigned int
34568 get_builtin_code_for_version (tree decl, tree *predicate_list)
34570 tree attrs;
34571 struct cl_target_option cur_target;
34572 tree target_node;
34573 struct cl_target_option *new_target;
34574 const char *arg_str = NULL;
34575 const char *attrs_str = NULL;
34576 char *tok_str = NULL;
34577 char *token;
34579 /* Priority of i386 features, greater value is higher priority. This is
34580 used to decide the order in which function dispatch must happen. For
34581 instance, a version specialized for SSE4.2 should be checked for dispatch
34582 before a version for SSE3, as SSE4.2 implies SSE3. */
34583 enum feature_priority
34585 P_ZERO = 0,
34586 P_MMX,
34587 P_SSE,
34588 P_SSE2,
34589 P_SSE3,
34590 P_SSSE3,
34591 P_PROC_SSSE3,
34592 P_SSE4_A,
34593 P_PROC_SSE4_A,
34594 P_SSE4_1,
34595 P_SSE4_2,
34596 P_PROC_SSE4_2,
34597 P_POPCNT,
34598 P_AVX,
34599 P_PROC_AVX,
34600 P_BMI,
34601 P_PROC_BMI,
34602 P_FMA4,
34603 P_XOP,
34604 P_PROC_XOP,
34605 P_FMA,
34606 P_PROC_FMA,
34607 P_BMI2,
34608 P_AVX2,
34609 P_PROC_AVX2,
34610 P_AVX512F,
34611 P_PROC_AVX512F
34614 enum feature_priority priority = P_ZERO;
34616 /* These are the target attribute strings for which a dispatcher is
34617 available, from fold_builtin_cpu. */
34619 static struct _feature_list
34621 const char *const name;
34622 const enum feature_priority priority;
34624 const feature_list[] =
34626 {"mmx", P_MMX},
34627 {"sse", P_SSE},
34628 {"sse2", P_SSE2},
34629 {"sse3", P_SSE3},
34630 {"sse4a", P_SSE4_A},
34631 {"ssse3", P_SSSE3},
34632 {"sse4.1", P_SSE4_1},
34633 {"sse4.2", P_SSE4_2},
34634 {"popcnt", P_POPCNT},
34635 {"avx", P_AVX},
34636 {"bmi", P_BMI},
34637 {"fma4", P_FMA4},
34638 {"xop", P_XOP},
34639 {"fma", P_FMA},
34640 {"bmi2", P_BMI2},
34641 {"avx2", P_AVX2},
34642 {"avx512f", P_AVX512F}
34646 static unsigned int NUM_FEATURES
34647 = sizeof (feature_list) / sizeof (struct _feature_list);
34649 unsigned int i;
34651 tree predicate_chain = NULL_TREE;
34652 tree predicate_decl, predicate_arg;
34654 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34655 gcc_assert (attrs != NULL);
34657 attrs = TREE_VALUE (TREE_VALUE (attrs));
34659 gcc_assert (TREE_CODE (attrs) == STRING_CST);
34660 attrs_str = TREE_STRING_POINTER (attrs);
34662 /* Return priority zero for default function. */
34663 if (strcmp (attrs_str, "default") == 0)
34664 return 0;
34666 /* Handle arch= if specified. For priority, set it to be 1 more than
34667 the best instruction set the processor can handle. For instance, if
34668 there is a version for atom and a version for ssse3 (the highest ISA
34669 priority for atom), the atom version must be checked for dispatch
34670 before the ssse3 version. */
34671 if (strstr (attrs_str, "arch=") != NULL)
34673 cl_target_option_save (&cur_target, &global_options);
34674 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
34675 &global_options_set);
34677 gcc_assert (target_node);
34678 new_target = TREE_TARGET_OPTION (target_node);
34679 gcc_assert (new_target);
34681 if (new_target->arch_specified && new_target->arch > 0)
34683 switch (new_target->arch)
34685 case PROCESSOR_CORE2:
34686 arg_str = "core2";
34687 priority = P_PROC_SSSE3;
34688 break;
34689 case PROCESSOR_NEHALEM:
34690 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
34691 arg_str = "westmere";
34692 else
34693 /* We translate "arch=corei7" and "arch=nehalem" to
34694 "corei7" so that it will be mapped to M_INTEL_COREI7
34695 as cpu type to cover all M_INTEL_COREI7_XXXs. */
34696 arg_str = "corei7";
34697 priority = P_PROC_SSE4_2;
34698 break;
34699 case PROCESSOR_SANDYBRIDGE:
34700 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
34701 arg_str = "ivybridge";
34702 else
34703 arg_str = "sandybridge";
34704 priority = P_PROC_AVX;
34705 break;
34706 case PROCESSOR_HASWELL:
34707 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
34708 arg_str = "broadwell";
34709 else
34710 arg_str = "haswell";
34711 priority = P_PROC_AVX2;
34712 break;
34713 case PROCESSOR_BONNELL:
34714 arg_str = "bonnell";
34715 priority = P_PROC_SSSE3;
34716 break;
34717 case PROCESSOR_KNL:
34718 arg_str = "knl";
34719 priority = P_PROC_AVX512F;
34720 break;
34721 case PROCESSOR_SILVERMONT:
34722 arg_str = "silvermont";
34723 priority = P_PROC_SSE4_2;
34724 break;
34725 case PROCESSOR_AMDFAM10:
34726 arg_str = "amdfam10h";
34727 priority = P_PROC_SSE4_A;
34728 break;
34729 case PROCESSOR_BTVER1:
34730 arg_str = "btver1";
34731 priority = P_PROC_SSE4_A;
34732 break;
34733 case PROCESSOR_BTVER2:
34734 arg_str = "btver2";
34735 priority = P_PROC_BMI;
34736 break;
34737 case PROCESSOR_BDVER1:
34738 arg_str = "bdver1";
34739 priority = P_PROC_XOP;
34740 break;
34741 case PROCESSOR_BDVER2:
34742 arg_str = "bdver2";
34743 priority = P_PROC_FMA;
34744 break;
34745 case PROCESSOR_BDVER3:
34746 arg_str = "bdver3";
34747 priority = P_PROC_FMA;
34748 break;
34749 case PROCESSOR_BDVER4:
34750 arg_str = "bdver4";
34751 priority = P_PROC_AVX2;
34752 break;
34756 cl_target_option_restore (&global_options, &cur_target);
34758 if (predicate_list && arg_str == NULL)
34760 error_at (DECL_SOURCE_LOCATION (decl),
34761 "No dispatcher found for the versioning attributes");
34762 return 0;
34765 if (predicate_list)
34767 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
34768 /* For a C string literal the length includes the trailing NULL. */
34769 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
34770 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34771 predicate_chain);
34775 /* Process feature name. */
34776 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
34777 strcpy (tok_str, attrs_str);
34778 token = strtok (tok_str, ",");
34779 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
34781 while (token != NULL)
34783 /* Do not process "arch=" */
34784 if (strncmp (token, "arch=", 5) == 0)
34786 token = strtok (NULL, ",");
34787 continue;
34789 for (i = 0; i < NUM_FEATURES; ++i)
34791 if (strcmp (token, feature_list[i].name) == 0)
34793 if (predicate_list)
34795 predicate_arg = build_string_literal (
34796 strlen (feature_list[i].name) + 1,
34797 feature_list[i].name);
34798 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34799 predicate_chain);
34801 /* Find the maximum priority feature. */
34802 if (feature_list[i].priority > priority)
34803 priority = feature_list[i].priority;
34805 break;
34808 if (predicate_list && i == NUM_FEATURES)
34810 error_at (DECL_SOURCE_LOCATION (decl),
34811 "No dispatcher found for %s", token);
34812 return 0;
34814 token = strtok (NULL, ",");
34816 free (tok_str);
34818 if (predicate_list && predicate_chain == NULL_TREE)
34820 error_at (DECL_SOURCE_LOCATION (decl),
34821 "No dispatcher found for the versioning attributes : %s",
34822 attrs_str);
34823 return 0;
34825 else if (predicate_list)
34827 predicate_chain = nreverse (predicate_chain);
34828 *predicate_list = predicate_chain;
34831 return priority;
34834 /* This compares the priority of target features in function DECL1
34835 and DECL2. It returns positive value if DECL1 is higher priority,
34836 negative value if DECL2 is higher priority and 0 if they are the
34837 same. */
34839 static int
34840 ix86_compare_version_priority (tree decl1, tree decl2)
34842 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
34843 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
34845 return (int)priority1 - (int)priority2;
34848 /* V1 and V2 point to function versions with different priorities
34849 based on the target ISA. This function compares their priorities. */
34851 static int
34852 feature_compare (const void *v1, const void *v2)
34854 typedef struct _function_version_info
34856 tree version_decl;
34857 tree predicate_chain;
34858 unsigned int dispatch_priority;
34859 } function_version_info;
34861 const function_version_info c1 = *(const function_version_info *)v1;
34862 const function_version_info c2 = *(const function_version_info *)v2;
34863 return (c2.dispatch_priority - c1.dispatch_priority);
34866 /* This function generates the dispatch function for
34867 multi-versioned functions. DISPATCH_DECL is the function which will
34868 contain the dispatch logic. FNDECLS are the function choices for
34869 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
34870 in DISPATCH_DECL in which the dispatch code is generated. */
34872 static int
34873 dispatch_function_versions (tree dispatch_decl,
34874 void *fndecls_p,
34875 basic_block *empty_bb)
34877 tree default_decl;
34878 gimple ifunc_cpu_init_stmt;
34879 gimple_seq gseq;
34880 int ix;
34881 tree ele;
34882 vec<tree> *fndecls;
34883 unsigned int num_versions = 0;
34884 unsigned int actual_versions = 0;
34885 unsigned int i;
34887 struct _function_version_info
34889 tree version_decl;
34890 tree predicate_chain;
34891 unsigned int dispatch_priority;
34892 }*function_version_info;
34894 gcc_assert (dispatch_decl != NULL
34895 && fndecls_p != NULL
34896 && empty_bb != NULL);
34898 /*fndecls_p is actually a vector. */
34899 fndecls = static_cast<vec<tree> *> (fndecls_p);
34901 /* At least one more version other than the default. */
34902 num_versions = fndecls->length ();
34903 gcc_assert (num_versions >= 2);
34905 function_version_info = (struct _function_version_info *)
34906 XNEWVEC (struct _function_version_info, (num_versions - 1));
34908 /* The first version in the vector is the default decl. */
34909 default_decl = (*fndecls)[0];
34911 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
34913 gseq = bb_seq (*empty_bb);
34914 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
34915 constructors, so explicity call __builtin_cpu_init here. */
34916 ifunc_cpu_init_stmt = gimple_build_call_vec (
34917 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
34918 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
34919 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
34920 set_bb_seq (*empty_bb, gseq);
34922 pop_cfun ();
34925 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
34927 tree version_decl = ele;
34928 tree predicate_chain = NULL_TREE;
34929 unsigned int priority;
34930 /* Get attribute string, parse it and find the right predicate decl.
34931 The predicate function could be a lengthy combination of many
34932 features, like arch-type and various isa-variants. */
34933 priority = get_builtin_code_for_version (version_decl,
34934 &predicate_chain);
34936 if (predicate_chain == NULL_TREE)
34937 continue;
34939 function_version_info [actual_versions].version_decl = version_decl;
34940 function_version_info [actual_versions].predicate_chain
34941 = predicate_chain;
34942 function_version_info [actual_versions].dispatch_priority = priority;
34943 actual_versions++;
34946 /* Sort the versions according to descending order of dispatch priority. The
34947 priority is based on the ISA. This is not a perfect solution. There
34948 could still be ambiguity. If more than one function version is suitable
34949 to execute, which one should be dispatched? In future, allow the user
34950 to specify a dispatch priority next to the version. */
34951 qsort (function_version_info, actual_versions,
34952 sizeof (struct _function_version_info), feature_compare);
34954 for (i = 0; i < actual_versions; ++i)
34955 *empty_bb = add_condition_to_bb (dispatch_decl,
34956 function_version_info[i].version_decl,
34957 function_version_info[i].predicate_chain,
34958 *empty_bb);
34960 /* dispatch default version at the end. */
34961 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
34962 NULL, *empty_bb);
34964 free (function_version_info);
34965 return 0;
34968 /* Comparator function to be used in qsort routine to sort attribute
34969 specification strings to "target". */
34971 static int
34972 attr_strcmp (const void *v1, const void *v2)
34974 const char *c1 = *(char *const*)v1;
34975 const char *c2 = *(char *const*)v2;
34976 return strcmp (c1, c2);
34979 /* ARGLIST is the argument to target attribute. This function tokenizes
34980 the comma separated arguments, sorts them and returns a string which
34981 is a unique identifier for the comma separated arguments. It also
34982 replaces non-identifier characters "=,-" with "_". */
34984 static char *
34985 sorted_attr_string (tree arglist)
34987 tree arg;
34988 size_t str_len_sum = 0;
34989 char **args = NULL;
34990 char *attr_str, *ret_str;
34991 char *attr = NULL;
34992 unsigned int argnum = 1;
34993 unsigned int i;
34995 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34997 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34998 size_t len = strlen (str);
34999 str_len_sum += len + 1;
35000 if (arg != arglist)
35001 argnum++;
35002 for (i = 0; i < strlen (str); i++)
35003 if (str[i] == ',')
35004 argnum++;
35007 attr_str = XNEWVEC (char, str_len_sum);
35008 str_len_sum = 0;
35009 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
35011 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
35012 size_t len = strlen (str);
35013 memcpy (attr_str + str_len_sum, str, len);
35014 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
35015 str_len_sum += len + 1;
35018 /* Replace "=,-" with "_". */
35019 for (i = 0; i < strlen (attr_str); i++)
35020 if (attr_str[i] == '=' || attr_str[i]== '-')
35021 attr_str[i] = '_';
35023 if (argnum == 1)
35024 return attr_str;
35026 args = XNEWVEC (char *, argnum);
35028 i = 0;
35029 attr = strtok (attr_str, ",");
35030 while (attr != NULL)
35032 args[i] = attr;
35033 i++;
35034 attr = strtok (NULL, ",");
35037 qsort (args, argnum, sizeof (char *), attr_strcmp);
35039 ret_str = XNEWVEC (char, str_len_sum);
35040 str_len_sum = 0;
35041 for (i = 0; i < argnum; i++)
35043 size_t len = strlen (args[i]);
35044 memcpy (ret_str + str_len_sum, args[i], len);
35045 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
35046 str_len_sum += len + 1;
35049 XDELETEVEC (args);
35050 XDELETEVEC (attr_str);
35051 return ret_str;
35054 /* This function changes the assembler name for functions that are
35055 versions. If DECL is a function version and has a "target"
35056 attribute, it appends the attribute string to its assembler name. */
35058 static tree
35059 ix86_mangle_function_version_assembler_name (tree decl, tree id)
35061 tree version_attr;
35062 const char *orig_name, *version_string;
35063 char *attr_str, *assembler_name;
35065 if (DECL_DECLARED_INLINE_P (decl)
35066 && lookup_attribute ("gnu_inline",
35067 DECL_ATTRIBUTES (decl)))
35068 error_at (DECL_SOURCE_LOCATION (decl),
35069 "Function versions cannot be marked as gnu_inline,"
35070 " bodies have to be generated");
35072 if (DECL_VIRTUAL_P (decl)
35073 || DECL_VINDEX (decl))
35074 sorry ("Virtual function multiversioning not supported");
35076 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
35078 /* target attribute string cannot be NULL. */
35079 gcc_assert (version_attr != NULL_TREE);
35081 orig_name = IDENTIFIER_POINTER (id);
35082 version_string
35083 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
35085 if (strcmp (version_string, "default") == 0)
35086 return id;
35088 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
35089 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
35091 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
35093 /* Allow assembler name to be modified if already set. */
35094 if (DECL_ASSEMBLER_NAME_SET_P (decl))
35095 SET_DECL_RTL (decl, NULL);
35097 tree ret = get_identifier (assembler_name);
35098 XDELETEVEC (attr_str);
35099 XDELETEVEC (assembler_name);
35100 return ret;
35103 /* This function returns true if FN1 and FN2 are versions of the same function,
35104 that is, the target strings of the function decls are different. This assumes
35105 that FN1 and FN2 have the same signature. */
35107 static bool
35108 ix86_function_versions (tree fn1, tree fn2)
35110 tree attr1, attr2;
35111 char *target1, *target2;
35112 bool result;
35114 if (TREE_CODE (fn1) != FUNCTION_DECL
35115 || TREE_CODE (fn2) != FUNCTION_DECL)
35116 return false;
35118 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
35119 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
35121 /* At least one function decl should have the target attribute specified. */
35122 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
35123 return false;
35125 /* Diagnose missing target attribute if one of the decls is already
35126 multi-versioned. */
35127 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
35129 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
35131 if (attr2 != NULL_TREE)
35133 std::swap (fn1, fn2);
35134 attr1 = attr2;
35136 error_at (DECL_SOURCE_LOCATION (fn2),
35137 "missing %<target%> attribute for multi-versioned %D",
35138 fn2);
35139 inform (DECL_SOURCE_LOCATION (fn1),
35140 "previous declaration of %D", fn1);
35141 /* Prevent diagnosing of the same error multiple times. */
35142 DECL_ATTRIBUTES (fn2)
35143 = tree_cons (get_identifier ("target"),
35144 copy_node (TREE_VALUE (attr1)),
35145 DECL_ATTRIBUTES (fn2));
35147 return false;
35150 target1 = sorted_attr_string (TREE_VALUE (attr1));
35151 target2 = sorted_attr_string (TREE_VALUE (attr2));
35153 /* The sorted target strings must be different for fn1 and fn2
35154 to be versions. */
35155 if (strcmp (target1, target2) == 0)
35156 result = false;
35157 else
35158 result = true;
35160 XDELETEVEC (target1);
35161 XDELETEVEC (target2);
35163 return result;
35166 static tree
35167 ix86_mangle_decl_assembler_name (tree decl, tree id)
35169 /* For function version, add the target suffix to the assembler name. */
35170 if (TREE_CODE (decl) == FUNCTION_DECL
35171 && DECL_FUNCTION_VERSIONED (decl))
35172 id = ix86_mangle_function_version_assembler_name (decl, id);
35173 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
35174 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
35175 #endif
35177 return id;
35180 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
35181 is true, append the full path name of the source file. */
35183 static char *
35184 make_name (tree decl, const char *suffix, bool make_unique)
35186 char *global_var_name;
35187 int name_len;
35188 const char *name;
35189 const char *unique_name = NULL;
35191 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
35193 /* Get a unique name that can be used globally without any chances
35194 of collision at link time. */
35195 if (make_unique)
35196 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
35198 name_len = strlen (name) + strlen (suffix) + 2;
35200 if (make_unique)
35201 name_len += strlen (unique_name) + 1;
35202 global_var_name = XNEWVEC (char, name_len);
35204 /* Use '.' to concatenate names as it is demangler friendly. */
35205 if (make_unique)
35206 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
35207 suffix);
35208 else
35209 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
35211 return global_var_name;
35214 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
35216 /* Make a dispatcher declaration for the multi-versioned function DECL.
35217 Calls to DECL function will be replaced with calls to the dispatcher
35218 by the front-end. Return the decl created. */
35220 static tree
35221 make_dispatcher_decl (const tree decl)
35223 tree func_decl;
35224 char *func_name;
35225 tree fn_type, func_type;
35226 bool is_uniq = false;
35228 if (TREE_PUBLIC (decl) == 0)
35229 is_uniq = true;
35231 func_name = make_name (decl, "ifunc", is_uniq);
35233 fn_type = TREE_TYPE (decl);
35234 func_type = build_function_type (TREE_TYPE (fn_type),
35235 TYPE_ARG_TYPES (fn_type));
35237 func_decl = build_fn_decl (func_name, func_type);
35238 XDELETEVEC (func_name);
35239 TREE_USED (func_decl) = 1;
35240 DECL_CONTEXT (func_decl) = NULL_TREE;
35241 DECL_INITIAL (func_decl) = error_mark_node;
35242 DECL_ARTIFICIAL (func_decl) = 1;
35243 /* Mark this func as external, the resolver will flip it again if
35244 it gets generated. */
35245 DECL_EXTERNAL (func_decl) = 1;
35246 /* This will be of type IFUNCs have to be externally visible. */
35247 TREE_PUBLIC (func_decl) = 1;
35249 return func_decl;
35252 #endif
35254 /* Returns true if decl is multi-versioned and DECL is the default function,
35255 that is it is not tagged with target specific optimization. */
35257 static bool
35258 is_function_default_version (const tree decl)
35260 if (TREE_CODE (decl) != FUNCTION_DECL
35261 || !DECL_FUNCTION_VERSIONED (decl))
35262 return false;
35263 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
35264 gcc_assert (attr);
35265 attr = TREE_VALUE (TREE_VALUE (attr));
35266 return (TREE_CODE (attr) == STRING_CST
35267 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
35270 /* Make a dispatcher declaration for the multi-versioned function DECL.
35271 Calls to DECL function will be replaced with calls to the dispatcher
35272 by the front-end. Returns the decl of the dispatcher function. */
35274 static tree
35275 ix86_get_function_versions_dispatcher (void *decl)
35277 tree fn = (tree) decl;
35278 struct cgraph_node *node = NULL;
35279 struct cgraph_node *default_node = NULL;
35280 struct cgraph_function_version_info *node_v = NULL;
35281 struct cgraph_function_version_info *first_v = NULL;
35283 tree dispatch_decl = NULL;
35285 struct cgraph_function_version_info *default_version_info = NULL;
35287 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
35289 node = cgraph_node::get (fn);
35290 gcc_assert (node != NULL);
35292 node_v = node->function_version ();
35293 gcc_assert (node_v != NULL);
35295 if (node_v->dispatcher_resolver != NULL)
35296 return node_v->dispatcher_resolver;
35298 /* Find the default version and make it the first node. */
35299 first_v = node_v;
35300 /* Go to the beginning of the chain. */
35301 while (first_v->prev != NULL)
35302 first_v = first_v->prev;
35303 default_version_info = first_v;
35304 while (default_version_info != NULL)
35306 if (is_function_default_version
35307 (default_version_info->this_node->decl))
35308 break;
35309 default_version_info = default_version_info->next;
35312 /* If there is no default node, just return NULL. */
35313 if (default_version_info == NULL)
35314 return NULL;
35316 /* Make default info the first node. */
35317 if (first_v != default_version_info)
35319 default_version_info->prev->next = default_version_info->next;
35320 if (default_version_info->next)
35321 default_version_info->next->prev = default_version_info->prev;
35322 first_v->prev = default_version_info;
35323 default_version_info->next = first_v;
35324 default_version_info->prev = NULL;
35327 default_node = default_version_info->this_node;
35329 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
35330 if (targetm.has_ifunc_p ())
35332 struct cgraph_function_version_info *it_v = NULL;
35333 struct cgraph_node *dispatcher_node = NULL;
35334 struct cgraph_function_version_info *dispatcher_version_info = NULL;
35336 /* Right now, the dispatching is done via ifunc. */
35337 dispatch_decl = make_dispatcher_decl (default_node->decl);
35339 dispatcher_node = cgraph_node::get_create (dispatch_decl);
35340 gcc_assert (dispatcher_node != NULL);
35341 dispatcher_node->dispatcher_function = 1;
35342 dispatcher_version_info
35343 = dispatcher_node->insert_new_function_version ();
35344 dispatcher_version_info->next = default_version_info;
35345 dispatcher_node->definition = 1;
35347 /* Set the dispatcher for all the versions. */
35348 it_v = default_version_info;
35349 while (it_v != NULL)
35351 it_v->dispatcher_resolver = dispatch_decl;
35352 it_v = it_v->next;
35355 else
35356 #endif
35358 error_at (DECL_SOURCE_LOCATION (default_node->decl),
35359 "multiversioning needs ifunc which is not supported "
35360 "on this target");
35363 return dispatch_decl;
35366 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
35367 it to CHAIN. */
35369 static tree
35370 make_attribute (const char *name, const char *arg_name, tree chain)
35372 tree attr_name;
35373 tree attr_arg_name;
35374 tree attr_args;
35375 tree attr;
35377 attr_name = get_identifier (name);
35378 attr_arg_name = build_string (strlen (arg_name), arg_name);
35379 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
35380 attr = tree_cons (attr_name, attr_args, chain);
35381 return attr;
35384 /* Make the resolver function decl to dispatch the versions of
35385 a multi-versioned function, DEFAULT_DECL. Create an
35386 empty basic block in the resolver and store the pointer in
35387 EMPTY_BB. Return the decl of the resolver function. */
35389 static tree
35390 make_resolver_func (const tree default_decl,
35391 const tree dispatch_decl,
35392 basic_block *empty_bb)
35394 char *resolver_name;
35395 tree decl, type, decl_name, t;
35396 bool is_uniq = false;
35398 /* IFUNC's have to be globally visible. So, if the default_decl is
35399 not, then the name of the IFUNC should be made unique. */
35400 if (TREE_PUBLIC (default_decl) == 0)
35401 is_uniq = true;
35403 /* Append the filename to the resolver function if the versions are
35404 not externally visible. This is because the resolver function has
35405 to be externally visible for the loader to find it. So, appending
35406 the filename will prevent conflicts with a resolver function from
35407 another module which is based on the same version name. */
35408 resolver_name = make_name (default_decl, "resolver", is_uniq);
35410 /* The resolver function should return a (void *). */
35411 type = build_function_type_list (ptr_type_node, NULL_TREE);
35413 decl = build_fn_decl (resolver_name, type);
35414 decl_name = get_identifier (resolver_name);
35415 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
35417 DECL_NAME (decl) = decl_name;
35418 TREE_USED (decl) = 1;
35419 DECL_ARTIFICIAL (decl) = 1;
35420 DECL_IGNORED_P (decl) = 0;
35421 /* IFUNC resolvers have to be externally visible. */
35422 TREE_PUBLIC (decl) = 1;
35423 DECL_UNINLINABLE (decl) = 1;
35425 /* Resolver is not external, body is generated. */
35426 DECL_EXTERNAL (decl) = 0;
35427 DECL_EXTERNAL (dispatch_decl) = 0;
35429 DECL_CONTEXT (decl) = NULL_TREE;
35430 DECL_INITIAL (decl) = make_node (BLOCK);
35431 DECL_STATIC_CONSTRUCTOR (decl) = 0;
35433 if (DECL_COMDAT_GROUP (default_decl)
35434 || TREE_PUBLIC (default_decl))
35436 /* In this case, each translation unit with a call to this
35437 versioned function will put out a resolver. Ensure it
35438 is comdat to keep just one copy. */
35439 DECL_COMDAT (decl) = 1;
35440 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
35442 /* Build result decl and add to function_decl. */
35443 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
35444 DECL_ARTIFICIAL (t) = 1;
35445 DECL_IGNORED_P (t) = 1;
35446 DECL_RESULT (decl) = t;
35448 gimplify_function_tree (decl);
35449 push_cfun (DECL_STRUCT_FUNCTION (decl));
35450 *empty_bb = init_lowered_empty_function (decl, false, 0);
35452 cgraph_node::add_new_function (decl, true);
35453 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
35455 pop_cfun ();
35457 gcc_assert (dispatch_decl != NULL);
35458 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
35459 DECL_ATTRIBUTES (dispatch_decl)
35460 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
35462 /* Create the alias for dispatch to resolver here. */
35463 /*cgraph_create_function_alias (dispatch_decl, decl);*/
35464 cgraph_node::create_same_body_alias (dispatch_decl, decl);
35465 XDELETEVEC (resolver_name);
35466 return decl;
35469 /* Generate the dispatching code body to dispatch multi-versioned function
35470 DECL. The target hook is called to process the "target" attributes and
35471 provide the code to dispatch the right function at run-time. NODE points
35472 to the dispatcher decl whose body will be created. */
35474 static tree
35475 ix86_generate_version_dispatcher_body (void *node_p)
35477 tree resolver_decl;
35478 basic_block empty_bb;
35479 tree default_ver_decl;
35480 struct cgraph_node *versn;
35481 struct cgraph_node *node;
35483 struct cgraph_function_version_info *node_version_info = NULL;
35484 struct cgraph_function_version_info *versn_info = NULL;
35486 node = (cgraph_node *)node_p;
35488 node_version_info = node->function_version ();
35489 gcc_assert (node->dispatcher_function
35490 && node_version_info != NULL);
35492 if (node_version_info->dispatcher_resolver)
35493 return node_version_info->dispatcher_resolver;
35495 /* The first version in the chain corresponds to the default version. */
35496 default_ver_decl = node_version_info->next->this_node->decl;
35498 /* node is going to be an alias, so remove the finalized bit. */
35499 node->definition = false;
35501 resolver_decl = make_resolver_func (default_ver_decl,
35502 node->decl, &empty_bb);
35504 node_version_info->dispatcher_resolver = resolver_decl;
35506 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
35508 auto_vec<tree, 2> fn_ver_vec;
35510 for (versn_info = node_version_info->next; versn_info;
35511 versn_info = versn_info->next)
35513 versn = versn_info->this_node;
35514 /* Check for virtual functions here again, as by this time it should
35515 have been determined if this function needs a vtable index or
35516 not. This happens for methods in derived classes that override
35517 virtual methods in base classes but are not explicitly marked as
35518 virtual. */
35519 if (DECL_VINDEX (versn->decl))
35520 sorry ("Virtual function multiversioning not supported");
35522 fn_ver_vec.safe_push (versn->decl);
35525 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
35526 cgraph_edge::rebuild_edges ();
35527 pop_cfun ();
35528 return resolver_decl;
35530 /* This builds the processor_model struct type defined in
35531 libgcc/config/i386/cpuinfo.c */
35533 static tree
35534 build_processor_model_struct (void)
35536 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
35537 "__cpu_features"};
35538 tree field = NULL_TREE, field_chain = NULL_TREE;
35539 int i;
35540 tree type = make_node (RECORD_TYPE);
35542 /* The first 3 fields are unsigned int. */
35543 for (i = 0; i < 3; ++i)
35545 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35546 get_identifier (field_name[i]), unsigned_type_node);
35547 if (field_chain != NULL_TREE)
35548 DECL_CHAIN (field) = field_chain;
35549 field_chain = field;
35552 /* The last field is an array of unsigned integers of size one. */
35553 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35554 get_identifier (field_name[3]),
35555 build_array_type (unsigned_type_node,
35556 build_index_type (size_one_node)));
35557 if (field_chain != NULL_TREE)
35558 DECL_CHAIN (field) = field_chain;
35559 field_chain = field;
35561 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
35562 return type;
35565 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
35567 static tree
35568 make_var_decl (tree type, const char *name)
35570 tree new_decl;
35572 new_decl = build_decl (UNKNOWN_LOCATION,
35573 VAR_DECL,
35574 get_identifier(name),
35575 type);
35577 DECL_EXTERNAL (new_decl) = 1;
35578 TREE_STATIC (new_decl) = 1;
35579 TREE_PUBLIC (new_decl) = 1;
35580 DECL_INITIAL (new_decl) = 0;
35581 DECL_ARTIFICIAL (new_decl) = 0;
35582 DECL_PRESERVE_P (new_decl) = 1;
35584 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
35585 assemble_variable (new_decl, 0, 0, 0);
35587 return new_decl;
35590 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
35591 into an integer defined in libgcc/config/i386/cpuinfo.c */
35593 static tree
35594 fold_builtin_cpu (tree fndecl, tree *args)
35596 unsigned int i;
35597 enum ix86_builtins fn_code = (enum ix86_builtins)
35598 DECL_FUNCTION_CODE (fndecl);
35599 tree param_string_cst = NULL;
35601 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
35602 enum processor_features
35604 F_CMOV = 0,
35605 F_MMX,
35606 F_POPCNT,
35607 F_SSE,
35608 F_SSE2,
35609 F_SSE3,
35610 F_SSSE3,
35611 F_SSE4_1,
35612 F_SSE4_2,
35613 F_AVX,
35614 F_AVX2,
35615 F_SSE4_A,
35616 F_FMA4,
35617 F_XOP,
35618 F_FMA,
35619 F_AVX512F,
35620 F_BMI,
35621 F_BMI2,
35622 F_MAX
35625 /* These are the values for vendor types and cpu types and subtypes
35626 in cpuinfo.c. Cpu types and subtypes should be subtracted by
35627 the corresponding start value. */
35628 enum processor_model
35630 M_INTEL = 1,
35631 M_AMD,
35632 M_CPU_TYPE_START,
35633 M_INTEL_BONNELL,
35634 M_INTEL_CORE2,
35635 M_INTEL_COREI7,
35636 M_AMDFAM10H,
35637 M_AMDFAM15H,
35638 M_INTEL_SILVERMONT,
35639 M_INTEL_KNL,
35640 M_AMD_BTVER1,
35641 M_AMD_BTVER2,
35642 M_CPU_SUBTYPE_START,
35643 M_INTEL_COREI7_NEHALEM,
35644 M_INTEL_COREI7_WESTMERE,
35645 M_INTEL_COREI7_SANDYBRIDGE,
35646 M_AMDFAM10H_BARCELONA,
35647 M_AMDFAM10H_SHANGHAI,
35648 M_AMDFAM10H_ISTANBUL,
35649 M_AMDFAM15H_BDVER1,
35650 M_AMDFAM15H_BDVER2,
35651 M_AMDFAM15H_BDVER3,
35652 M_AMDFAM15H_BDVER4,
35653 M_INTEL_COREI7_IVYBRIDGE,
35654 M_INTEL_COREI7_HASWELL,
35655 M_INTEL_COREI7_BROADWELL
35658 static struct _arch_names_table
35660 const char *const name;
35661 const enum processor_model model;
35663 const arch_names_table[] =
35665 {"amd", M_AMD},
35666 {"intel", M_INTEL},
35667 {"atom", M_INTEL_BONNELL},
35668 {"slm", M_INTEL_SILVERMONT},
35669 {"core2", M_INTEL_CORE2},
35670 {"corei7", M_INTEL_COREI7},
35671 {"nehalem", M_INTEL_COREI7_NEHALEM},
35672 {"westmere", M_INTEL_COREI7_WESTMERE},
35673 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
35674 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
35675 {"haswell", M_INTEL_COREI7_HASWELL},
35676 {"broadwell", M_INTEL_COREI7_BROADWELL},
35677 {"bonnell", M_INTEL_BONNELL},
35678 {"silvermont", M_INTEL_SILVERMONT},
35679 {"knl", M_INTEL_KNL},
35680 {"amdfam10h", M_AMDFAM10H},
35681 {"barcelona", M_AMDFAM10H_BARCELONA},
35682 {"shanghai", M_AMDFAM10H_SHANGHAI},
35683 {"istanbul", M_AMDFAM10H_ISTANBUL},
35684 {"btver1", M_AMD_BTVER1},
35685 {"amdfam15h", M_AMDFAM15H},
35686 {"bdver1", M_AMDFAM15H_BDVER1},
35687 {"bdver2", M_AMDFAM15H_BDVER2},
35688 {"bdver3", M_AMDFAM15H_BDVER3},
35689 {"bdver4", M_AMDFAM15H_BDVER4},
35690 {"btver2", M_AMD_BTVER2},
35693 static struct _isa_names_table
35695 const char *const name;
35696 const enum processor_features feature;
35698 const isa_names_table[] =
35700 {"cmov", F_CMOV},
35701 {"mmx", F_MMX},
35702 {"popcnt", F_POPCNT},
35703 {"sse", F_SSE},
35704 {"sse2", F_SSE2},
35705 {"sse3", F_SSE3},
35706 {"ssse3", F_SSSE3},
35707 {"sse4a", F_SSE4_A},
35708 {"sse4.1", F_SSE4_1},
35709 {"sse4.2", F_SSE4_2},
35710 {"avx", F_AVX},
35711 {"fma4", F_FMA4},
35712 {"xop", F_XOP},
35713 {"fma", F_FMA},
35714 {"avx2", F_AVX2},
35715 {"avx512f",F_AVX512F},
35716 {"bmi", F_BMI},
35717 {"bmi2", F_BMI2}
35720 tree __processor_model_type = build_processor_model_struct ();
35721 tree __cpu_model_var = make_var_decl (__processor_model_type,
35722 "__cpu_model");
35725 varpool_node::add (__cpu_model_var);
35727 gcc_assert ((args != NULL) && (*args != NULL));
35729 param_string_cst = *args;
35730 while (param_string_cst
35731 && TREE_CODE (param_string_cst) != STRING_CST)
35733 /* *args must be a expr that can contain other EXPRS leading to a
35734 STRING_CST. */
35735 if (!EXPR_P (param_string_cst))
35737 error ("Parameter to builtin must be a string constant or literal");
35738 return integer_zero_node;
35740 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
35743 gcc_assert (param_string_cst);
35745 if (fn_code == IX86_BUILTIN_CPU_IS)
35747 tree ref;
35748 tree field;
35749 tree final;
35751 unsigned int field_val = 0;
35752 unsigned int NUM_ARCH_NAMES
35753 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
35755 for (i = 0; i < NUM_ARCH_NAMES; i++)
35756 if (strcmp (arch_names_table[i].name,
35757 TREE_STRING_POINTER (param_string_cst)) == 0)
35758 break;
35760 if (i == NUM_ARCH_NAMES)
35762 error ("Parameter to builtin not valid: %s",
35763 TREE_STRING_POINTER (param_string_cst));
35764 return integer_zero_node;
35767 field = TYPE_FIELDS (__processor_model_type);
35768 field_val = arch_names_table[i].model;
35770 /* CPU types are stored in the next field. */
35771 if (field_val > M_CPU_TYPE_START
35772 && field_val < M_CPU_SUBTYPE_START)
35774 field = DECL_CHAIN (field);
35775 field_val -= M_CPU_TYPE_START;
35778 /* CPU subtypes are stored in the next field. */
35779 if (field_val > M_CPU_SUBTYPE_START)
35781 field = DECL_CHAIN ( DECL_CHAIN (field));
35782 field_val -= M_CPU_SUBTYPE_START;
35785 /* Get the appropriate field in __cpu_model. */
35786 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35787 field, NULL_TREE);
35789 /* Check the value. */
35790 final = build2 (EQ_EXPR, unsigned_type_node, ref,
35791 build_int_cstu (unsigned_type_node, field_val));
35792 return build1 (CONVERT_EXPR, integer_type_node, final);
35794 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35796 tree ref;
35797 tree array_elt;
35798 tree field;
35799 tree final;
35801 unsigned int field_val = 0;
35802 unsigned int NUM_ISA_NAMES
35803 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
35805 for (i = 0; i < NUM_ISA_NAMES; i++)
35806 if (strcmp (isa_names_table[i].name,
35807 TREE_STRING_POINTER (param_string_cst)) == 0)
35808 break;
35810 if (i == NUM_ISA_NAMES)
35812 error ("Parameter to builtin not valid: %s",
35813 TREE_STRING_POINTER (param_string_cst));
35814 return integer_zero_node;
35817 field = TYPE_FIELDS (__processor_model_type);
35818 /* Get the last field, which is __cpu_features. */
35819 while (DECL_CHAIN (field))
35820 field = DECL_CHAIN (field);
35822 /* Get the appropriate field: __cpu_model.__cpu_features */
35823 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35824 field, NULL_TREE);
35826 /* Access the 0th element of __cpu_features array. */
35827 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
35828 integer_zero_node, NULL_TREE, NULL_TREE);
35830 field_val = (1 << isa_names_table[i].feature);
35831 /* Return __cpu_model.__cpu_features[0] & field_val */
35832 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
35833 build_int_cstu (unsigned_type_node, field_val));
35834 return build1 (CONVERT_EXPR, integer_type_node, final);
35836 gcc_unreachable ();
35839 static tree
35840 ix86_fold_builtin (tree fndecl, int n_args,
35841 tree *args, bool ignore ATTRIBUTE_UNUSED)
35843 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
35845 enum ix86_builtins fn_code = (enum ix86_builtins)
35846 DECL_FUNCTION_CODE (fndecl);
35847 if (fn_code == IX86_BUILTIN_CPU_IS
35848 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35850 gcc_assert (n_args == 1);
35851 return fold_builtin_cpu (fndecl, args);
35855 #ifdef SUBTARGET_FOLD_BUILTIN
35856 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
35857 #endif
35859 return NULL_TREE;
35862 /* Make builtins to detect cpu type and features supported. NAME is
35863 the builtin name, CODE is the builtin code, and FTYPE is the function
35864 type of the builtin. */
35866 static void
35867 make_cpu_type_builtin (const char* name, int code,
35868 enum ix86_builtin_func_type ftype, bool is_const)
35870 tree decl;
35871 tree type;
35873 type = ix86_get_builtin_func_type (ftype);
35874 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
35875 NULL, NULL_TREE);
35876 gcc_assert (decl != NULL_TREE);
35877 ix86_builtins[(int) code] = decl;
35878 TREE_READONLY (decl) = is_const;
35881 /* Make builtins to get CPU type and features supported. The created
35882 builtins are :
35884 __builtin_cpu_init (), to detect cpu type and features,
35885 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
35886 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
35889 static void
35890 ix86_init_platform_type_builtins (void)
35892 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
35893 INT_FTYPE_VOID, false);
35894 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
35895 INT_FTYPE_PCCHAR, true);
35896 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
35897 INT_FTYPE_PCCHAR, true);
35900 /* Internal method for ix86_init_builtins. */
35902 static void
35903 ix86_init_builtins_va_builtins_abi (void)
35905 tree ms_va_ref, sysv_va_ref;
35906 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
35907 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
35908 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
35909 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
35911 if (!TARGET_64BIT)
35912 return;
35913 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
35914 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
35915 ms_va_ref = build_reference_type (ms_va_list_type_node);
35916 sysv_va_ref =
35917 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
35919 fnvoid_va_end_ms =
35920 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35921 fnvoid_va_start_ms =
35922 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35923 fnvoid_va_end_sysv =
35924 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
35925 fnvoid_va_start_sysv =
35926 build_varargs_function_type_list (void_type_node, sysv_va_ref,
35927 NULL_TREE);
35928 fnvoid_va_copy_ms =
35929 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
35930 NULL_TREE);
35931 fnvoid_va_copy_sysv =
35932 build_function_type_list (void_type_node, sysv_va_ref,
35933 sysv_va_ref, NULL_TREE);
35935 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
35936 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
35937 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
35938 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
35939 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
35940 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
35941 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
35942 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35943 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
35944 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35945 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
35946 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35949 static void
35950 ix86_init_builtin_types (void)
35952 tree float128_type_node, float80_type_node;
35954 /* The __float80 type. */
35955 float80_type_node = long_double_type_node;
35956 if (TYPE_MODE (float80_type_node) != XFmode)
35958 /* The __float80 type. */
35959 float80_type_node = make_node (REAL_TYPE);
35961 TYPE_PRECISION (float80_type_node) = 80;
35962 layout_type (float80_type_node);
35964 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
35966 /* The __float128 type. */
35967 float128_type_node = make_node (REAL_TYPE);
35968 TYPE_PRECISION (float128_type_node) = 128;
35969 layout_type (float128_type_node);
35970 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
35972 /* This macro is built by i386-builtin-types.awk. */
35973 DEFINE_BUILTIN_PRIMITIVE_TYPES;
35976 static void
35977 ix86_init_builtins (void)
35979 tree t;
35981 ix86_init_builtin_types ();
35983 /* Builtins to get CPU type and features. */
35984 ix86_init_platform_type_builtins ();
35986 /* TFmode support builtins. */
35987 def_builtin_const (0, "__builtin_infq",
35988 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
35989 def_builtin_const (0, "__builtin_huge_valq",
35990 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
35992 /* We will expand them to normal call if SSE isn't available since
35993 they are used by libgcc. */
35994 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
35995 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
35996 BUILT_IN_MD, "__fabstf2", NULL_TREE);
35997 TREE_READONLY (t) = 1;
35998 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
36000 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
36001 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
36002 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
36003 TREE_READONLY (t) = 1;
36004 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
36006 ix86_init_tm_builtins ();
36007 ix86_init_mmx_sse_builtins ();
36008 ix86_init_mpx_builtins ();
36010 if (TARGET_LP64)
36011 ix86_init_builtins_va_builtins_abi ();
36013 #ifdef SUBTARGET_INIT_BUILTINS
36014 SUBTARGET_INIT_BUILTINS;
36015 #endif
36018 /* Return the ix86 builtin for CODE. */
36020 static tree
36021 ix86_builtin_decl (unsigned code, bool)
36023 if (code >= IX86_BUILTIN_MAX)
36024 return error_mark_node;
36026 return ix86_builtins[code];
36029 /* Errors in the source file can cause expand_expr to return const0_rtx
36030 where we expect a vector. To avoid crashing, use one of the vector
36031 clear instructions. */
36032 static rtx
36033 safe_vector_operand (rtx x, machine_mode mode)
36035 if (x == const0_rtx)
36036 x = CONST0_RTX (mode);
36037 return x;
36040 /* Fixup modeless constants to fit required mode. */
36041 static rtx
36042 fixup_modeless_constant (rtx x, machine_mode mode)
36044 if (GET_MODE (x) == VOIDmode)
36045 x = convert_to_mode (mode, x, 1);
36046 return x;
36049 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
36051 static rtx
36052 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
36054 rtx pat;
36055 tree arg0 = CALL_EXPR_ARG (exp, 0);
36056 tree arg1 = CALL_EXPR_ARG (exp, 1);
36057 rtx op0 = expand_normal (arg0);
36058 rtx op1 = expand_normal (arg1);
36059 machine_mode tmode = insn_data[icode].operand[0].mode;
36060 machine_mode mode0 = insn_data[icode].operand[1].mode;
36061 machine_mode mode1 = insn_data[icode].operand[2].mode;
36063 if (VECTOR_MODE_P (mode0))
36064 op0 = safe_vector_operand (op0, mode0);
36065 if (VECTOR_MODE_P (mode1))
36066 op1 = safe_vector_operand (op1, mode1);
36068 if (optimize || !target
36069 || GET_MODE (target) != tmode
36070 || !insn_data[icode].operand[0].predicate (target, tmode))
36071 target = gen_reg_rtx (tmode);
36073 if (GET_MODE (op1) == SImode && mode1 == TImode)
36075 rtx x = gen_reg_rtx (V4SImode);
36076 emit_insn (gen_sse2_loadd (x, op1));
36077 op1 = gen_lowpart (TImode, x);
36080 if (!insn_data[icode].operand[1].predicate (op0, mode0))
36081 op0 = copy_to_mode_reg (mode0, op0);
36082 if (!insn_data[icode].operand[2].predicate (op1, mode1))
36083 op1 = copy_to_mode_reg (mode1, op1);
36085 pat = GEN_FCN (icode) (target, op0, op1);
36086 if (! pat)
36087 return 0;
36089 emit_insn (pat);
36091 return target;
36094 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
36096 static rtx
36097 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
36098 enum ix86_builtin_func_type m_type,
36099 enum rtx_code sub_code)
36101 rtx pat;
36102 int i;
36103 int nargs;
36104 bool comparison_p = false;
36105 bool tf_p = false;
36106 bool last_arg_constant = false;
36107 int num_memory = 0;
36108 struct {
36109 rtx op;
36110 machine_mode mode;
36111 } args[4];
36113 machine_mode tmode = insn_data[icode].operand[0].mode;
36115 switch (m_type)
36117 case MULTI_ARG_4_DF2_DI_I:
36118 case MULTI_ARG_4_DF2_DI_I1:
36119 case MULTI_ARG_4_SF2_SI_I:
36120 case MULTI_ARG_4_SF2_SI_I1:
36121 nargs = 4;
36122 last_arg_constant = true;
36123 break;
36125 case MULTI_ARG_3_SF:
36126 case MULTI_ARG_3_DF:
36127 case MULTI_ARG_3_SF2:
36128 case MULTI_ARG_3_DF2:
36129 case MULTI_ARG_3_DI:
36130 case MULTI_ARG_3_SI:
36131 case MULTI_ARG_3_SI_DI:
36132 case MULTI_ARG_3_HI:
36133 case MULTI_ARG_3_HI_SI:
36134 case MULTI_ARG_3_QI:
36135 case MULTI_ARG_3_DI2:
36136 case MULTI_ARG_3_SI2:
36137 case MULTI_ARG_3_HI2:
36138 case MULTI_ARG_3_QI2:
36139 nargs = 3;
36140 break;
36142 case MULTI_ARG_2_SF:
36143 case MULTI_ARG_2_DF:
36144 case MULTI_ARG_2_DI:
36145 case MULTI_ARG_2_SI:
36146 case MULTI_ARG_2_HI:
36147 case MULTI_ARG_2_QI:
36148 nargs = 2;
36149 break;
36151 case MULTI_ARG_2_DI_IMM:
36152 case MULTI_ARG_2_SI_IMM:
36153 case MULTI_ARG_2_HI_IMM:
36154 case MULTI_ARG_2_QI_IMM:
36155 nargs = 2;
36156 last_arg_constant = true;
36157 break;
36159 case MULTI_ARG_1_SF:
36160 case MULTI_ARG_1_DF:
36161 case MULTI_ARG_1_SF2:
36162 case MULTI_ARG_1_DF2:
36163 case MULTI_ARG_1_DI:
36164 case MULTI_ARG_1_SI:
36165 case MULTI_ARG_1_HI:
36166 case MULTI_ARG_1_QI:
36167 case MULTI_ARG_1_SI_DI:
36168 case MULTI_ARG_1_HI_DI:
36169 case MULTI_ARG_1_HI_SI:
36170 case MULTI_ARG_1_QI_DI:
36171 case MULTI_ARG_1_QI_SI:
36172 case MULTI_ARG_1_QI_HI:
36173 nargs = 1;
36174 break;
36176 case MULTI_ARG_2_DI_CMP:
36177 case MULTI_ARG_2_SI_CMP:
36178 case MULTI_ARG_2_HI_CMP:
36179 case MULTI_ARG_2_QI_CMP:
36180 nargs = 2;
36181 comparison_p = true;
36182 break;
36184 case MULTI_ARG_2_SF_TF:
36185 case MULTI_ARG_2_DF_TF:
36186 case MULTI_ARG_2_DI_TF:
36187 case MULTI_ARG_2_SI_TF:
36188 case MULTI_ARG_2_HI_TF:
36189 case MULTI_ARG_2_QI_TF:
36190 nargs = 2;
36191 tf_p = true;
36192 break;
36194 default:
36195 gcc_unreachable ();
36198 if (optimize || !target
36199 || GET_MODE (target) != tmode
36200 || !insn_data[icode].operand[0].predicate (target, tmode))
36201 target = gen_reg_rtx (tmode);
36203 gcc_assert (nargs <= 4);
36205 for (i = 0; i < nargs; i++)
36207 tree arg = CALL_EXPR_ARG (exp, i);
36208 rtx op = expand_normal (arg);
36209 int adjust = (comparison_p) ? 1 : 0;
36210 machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
36212 if (last_arg_constant && i == nargs - 1)
36214 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
36216 enum insn_code new_icode = icode;
36217 switch (icode)
36219 case CODE_FOR_xop_vpermil2v2df3:
36220 case CODE_FOR_xop_vpermil2v4sf3:
36221 case CODE_FOR_xop_vpermil2v4df3:
36222 case CODE_FOR_xop_vpermil2v8sf3:
36223 error ("the last argument must be a 2-bit immediate");
36224 return gen_reg_rtx (tmode);
36225 case CODE_FOR_xop_rotlv2di3:
36226 new_icode = CODE_FOR_rotlv2di3;
36227 goto xop_rotl;
36228 case CODE_FOR_xop_rotlv4si3:
36229 new_icode = CODE_FOR_rotlv4si3;
36230 goto xop_rotl;
36231 case CODE_FOR_xop_rotlv8hi3:
36232 new_icode = CODE_FOR_rotlv8hi3;
36233 goto xop_rotl;
36234 case CODE_FOR_xop_rotlv16qi3:
36235 new_icode = CODE_FOR_rotlv16qi3;
36236 xop_rotl:
36237 if (CONST_INT_P (op))
36239 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
36240 op = GEN_INT (INTVAL (op) & mask);
36241 gcc_checking_assert
36242 (insn_data[icode].operand[i + 1].predicate (op, mode));
36244 else
36246 gcc_checking_assert
36247 (nargs == 2
36248 && insn_data[new_icode].operand[0].mode == tmode
36249 && insn_data[new_icode].operand[1].mode == tmode
36250 && insn_data[new_icode].operand[2].mode == mode
36251 && insn_data[new_icode].operand[0].predicate
36252 == insn_data[icode].operand[0].predicate
36253 && insn_data[new_icode].operand[1].predicate
36254 == insn_data[icode].operand[1].predicate);
36255 icode = new_icode;
36256 goto non_constant;
36258 break;
36259 default:
36260 gcc_unreachable ();
36264 else
36266 non_constant:
36267 if (VECTOR_MODE_P (mode))
36268 op = safe_vector_operand (op, mode);
36270 /* If we aren't optimizing, only allow one memory operand to be
36271 generated. */
36272 if (memory_operand (op, mode))
36273 num_memory++;
36275 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
36277 if (optimize
36278 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
36279 || num_memory > 1)
36280 op = force_reg (mode, op);
36283 args[i].op = op;
36284 args[i].mode = mode;
36287 switch (nargs)
36289 case 1:
36290 pat = GEN_FCN (icode) (target, args[0].op);
36291 break;
36293 case 2:
36294 if (tf_p)
36295 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
36296 GEN_INT ((int)sub_code));
36297 else if (! comparison_p)
36298 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
36299 else
36301 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
36302 args[0].op,
36303 args[1].op);
36305 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
36307 break;
36309 case 3:
36310 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
36311 break;
36313 case 4:
36314 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
36315 break;
36317 default:
36318 gcc_unreachable ();
36321 if (! pat)
36322 return 0;
36324 emit_insn (pat);
36325 return target;
36328 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
36329 insns with vec_merge. */
36331 static rtx
36332 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
36333 rtx target)
36335 rtx pat;
36336 tree arg0 = CALL_EXPR_ARG (exp, 0);
36337 rtx op1, op0 = expand_normal (arg0);
36338 machine_mode tmode = insn_data[icode].operand[0].mode;
36339 machine_mode mode0 = insn_data[icode].operand[1].mode;
36341 if (optimize || !target
36342 || GET_MODE (target) != tmode
36343 || !insn_data[icode].operand[0].predicate (target, tmode))
36344 target = gen_reg_rtx (tmode);
36346 if (VECTOR_MODE_P (mode0))
36347 op0 = safe_vector_operand (op0, mode0);
36349 if ((optimize && !register_operand (op0, mode0))
36350 || !insn_data[icode].operand[1].predicate (op0, mode0))
36351 op0 = copy_to_mode_reg (mode0, op0);
36353 op1 = op0;
36354 if (!insn_data[icode].operand[2].predicate (op1, mode0))
36355 op1 = copy_to_mode_reg (mode0, op1);
36357 pat = GEN_FCN (icode) (target, op0, op1);
36358 if (! pat)
36359 return 0;
36360 emit_insn (pat);
36361 return target;
36364 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
36366 static rtx
36367 ix86_expand_sse_compare (const struct builtin_description *d,
36368 tree exp, rtx target, bool swap)
36370 rtx pat;
36371 tree arg0 = CALL_EXPR_ARG (exp, 0);
36372 tree arg1 = CALL_EXPR_ARG (exp, 1);
36373 rtx op0 = expand_normal (arg0);
36374 rtx op1 = expand_normal (arg1);
36375 rtx op2;
36376 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36377 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36378 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36379 enum rtx_code comparison = d->comparison;
36381 if (VECTOR_MODE_P (mode0))
36382 op0 = safe_vector_operand (op0, mode0);
36383 if (VECTOR_MODE_P (mode1))
36384 op1 = safe_vector_operand (op1, mode1);
36386 /* Swap operands if we have a comparison that isn't available in
36387 hardware. */
36388 if (swap)
36389 std::swap (op0, op1);
36391 if (optimize || !target
36392 || GET_MODE (target) != tmode
36393 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36394 target = gen_reg_rtx (tmode);
36396 if ((optimize && !register_operand (op0, mode0))
36397 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
36398 op0 = copy_to_mode_reg (mode0, op0);
36399 if ((optimize && !register_operand (op1, mode1))
36400 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
36401 op1 = copy_to_mode_reg (mode1, op1);
36403 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
36404 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36405 if (! pat)
36406 return 0;
36407 emit_insn (pat);
36408 return target;
36411 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
36413 static rtx
36414 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
36415 rtx target)
36417 rtx pat;
36418 tree arg0 = CALL_EXPR_ARG (exp, 0);
36419 tree arg1 = CALL_EXPR_ARG (exp, 1);
36420 rtx op0 = expand_normal (arg0);
36421 rtx op1 = expand_normal (arg1);
36422 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36423 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36424 enum rtx_code comparison = d->comparison;
36426 if (VECTOR_MODE_P (mode0))
36427 op0 = safe_vector_operand (op0, mode0);
36428 if (VECTOR_MODE_P (mode1))
36429 op1 = safe_vector_operand (op1, mode1);
36431 /* Swap operands if we have a comparison that isn't available in
36432 hardware. */
36433 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
36434 std::swap (op0, op1);
36436 target = gen_reg_rtx (SImode);
36437 emit_move_insn (target, const0_rtx);
36438 target = gen_rtx_SUBREG (QImode, target, 0);
36440 if ((optimize && !register_operand (op0, mode0))
36441 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36442 op0 = copy_to_mode_reg (mode0, op0);
36443 if ((optimize && !register_operand (op1, mode1))
36444 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36445 op1 = copy_to_mode_reg (mode1, op1);
36447 pat = GEN_FCN (d->icode) (op0, op1);
36448 if (! pat)
36449 return 0;
36450 emit_insn (pat);
36451 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36452 gen_rtx_fmt_ee (comparison, QImode,
36453 SET_DEST (pat),
36454 const0_rtx)));
36456 return SUBREG_REG (target);
36459 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
36461 static rtx
36462 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
36463 rtx target)
36465 rtx pat;
36466 tree arg0 = CALL_EXPR_ARG (exp, 0);
36467 rtx op1, op0 = expand_normal (arg0);
36468 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36469 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36471 if (optimize || target == 0
36472 || GET_MODE (target) != tmode
36473 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36474 target = gen_reg_rtx (tmode);
36476 if (VECTOR_MODE_P (mode0))
36477 op0 = safe_vector_operand (op0, mode0);
36479 if ((optimize && !register_operand (op0, mode0))
36480 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36481 op0 = copy_to_mode_reg (mode0, op0);
36483 op1 = GEN_INT (d->comparison);
36485 pat = GEN_FCN (d->icode) (target, op0, op1);
36486 if (! pat)
36487 return 0;
36488 emit_insn (pat);
36489 return target;
36492 static rtx
36493 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
36494 tree exp, rtx target)
36496 rtx pat;
36497 tree arg0 = CALL_EXPR_ARG (exp, 0);
36498 tree arg1 = CALL_EXPR_ARG (exp, 1);
36499 rtx op0 = expand_normal (arg0);
36500 rtx op1 = expand_normal (arg1);
36501 rtx op2;
36502 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36503 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36504 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36506 if (optimize || target == 0
36507 || GET_MODE (target) != tmode
36508 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36509 target = gen_reg_rtx (tmode);
36511 op0 = safe_vector_operand (op0, mode0);
36512 op1 = safe_vector_operand (op1, mode1);
36514 if ((optimize && !register_operand (op0, mode0))
36515 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36516 op0 = copy_to_mode_reg (mode0, op0);
36517 if ((optimize && !register_operand (op1, mode1))
36518 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36519 op1 = copy_to_mode_reg (mode1, op1);
36521 op2 = GEN_INT (d->comparison);
36523 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36524 if (! pat)
36525 return 0;
36526 emit_insn (pat);
36527 return target;
36530 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
36532 static rtx
36533 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
36534 rtx target)
36536 rtx pat;
36537 tree arg0 = CALL_EXPR_ARG (exp, 0);
36538 tree arg1 = CALL_EXPR_ARG (exp, 1);
36539 rtx op0 = expand_normal (arg0);
36540 rtx op1 = expand_normal (arg1);
36541 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36542 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36543 enum rtx_code comparison = d->comparison;
36545 if (VECTOR_MODE_P (mode0))
36546 op0 = safe_vector_operand (op0, mode0);
36547 if (VECTOR_MODE_P (mode1))
36548 op1 = safe_vector_operand (op1, mode1);
36550 target = gen_reg_rtx (SImode);
36551 emit_move_insn (target, const0_rtx);
36552 target = gen_rtx_SUBREG (QImode, target, 0);
36554 if ((optimize && !register_operand (op0, mode0))
36555 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36556 op0 = copy_to_mode_reg (mode0, op0);
36557 if ((optimize && !register_operand (op1, mode1))
36558 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36559 op1 = copy_to_mode_reg (mode1, op1);
36561 pat = GEN_FCN (d->icode) (op0, op1);
36562 if (! pat)
36563 return 0;
36564 emit_insn (pat);
36565 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36566 gen_rtx_fmt_ee (comparison, QImode,
36567 SET_DEST (pat),
36568 const0_rtx)));
36570 return SUBREG_REG (target);
36573 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
36575 static rtx
36576 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
36577 tree exp, rtx target)
36579 rtx pat;
36580 tree arg0 = CALL_EXPR_ARG (exp, 0);
36581 tree arg1 = CALL_EXPR_ARG (exp, 1);
36582 tree arg2 = CALL_EXPR_ARG (exp, 2);
36583 tree arg3 = CALL_EXPR_ARG (exp, 3);
36584 tree arg4 = CALL_EXPR_ARG (exp, 4);
36585 rtx scratch0, scratch1;
36586 rtx op0 = expand_normal (arg0);
36587 rtx op1 = expand_normal (arg1);
36588 rtx op2 = expand_normal (arg2);
36589 rtx op3 = expand_normal (arg3);
36590 rtx op4 = expand_normal (arg4);
36591 machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
36593 tmode0 = insn_data[d->icode].operand[0].mode;
36594 tmode1 = insn_data[d->icode].operand[1].mode;
36595 modev2 = insn_data[d->icode].operand[2].mode;
36596 modei3 = insn_data[d->icode].operand[3].mode;
36597 modev4 = insn_data[d->icode].operand[4].mode;
36598 modei5 = insn_data[d->icode].operand[5].mode;
36599 modeimm = insn_data[d->icode].operand[6].mode;
36601 if (VECTOR_MODE_P (modev2))
36602 op0 = safe_vector_operand (op0, modev2);
36603 if (VECTOR_MODE_P (modev4))
36604 op2 = safe_vector_operand (op2, modev4);
36606 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36607 op0 = copy_to_mode_reg (modev2, op0);
36608 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
36609 op1 = copy_to_mode_reg (modei3, op1);
36610 if ((optimize && !register_operand (op2, modev4))
36611 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
36612 op2 = copy_to_mode_reg (modev4, op2);
36613 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
36614 op3 = copy_to_mode_reg (modei5, op3);
36616 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
36618 error ("the fifth argument must be an 8-bit immediate");
36619 return const0_rtx;
36622 if (d->code == IX86_BUILTIN_PCMPESTRI128)
36624 if (optimize || !target
36625 || GET_MODE (target) != tmode0
36626 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36627 target = gen_reg_rtx (tmode0);
36629 scratch1 = gen_reg_rtx (tmode1);
36631 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
36633 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
36635 if (optimize || !target
36636 || GET_MODE (target) != tmode1
36637 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36638 target = gen_reg_rtx (tmode1);
36640 scratch0 = gen_reg_rtx (tmode0);
36642 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
36644 else
36646 gcc_assert (d->flag);
36648 scratch0 = gen_reg_rtx (tmode0);
36649 scratch1 = gen_reg_rtx (tmode1);
36651 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
36654 if (! pat)
36655 return 0;
36657 emit_insn (pat);
36659 if (d->flag)
36661 target = gen_reg_rtx (SImode);
36662 emit_move_insn (target, const0_rtx);
36663 target = gen_rtx_SUBREG (QImode, target, 0);
36665 emit_insn
36666 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36667 gen_rtx_fmt_ee (EQ, QImode,
36668 gen_rtx_REG ((machine_mode) d->flag,
36669 FLAGS_REG),
36670 const0_rtx)));
36671 return SUBREG_REG (target);
36673 else
36674 return target;
36678 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
36680 static rtx
36681 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
36682 tree exp, rtx target)
36684 rtx pat;
36685 tree arg0 = CALL_EXPR_ARG (exp, 0);
36686 tree arg1 = CALL_EXPR_ARG (exp, 1);
36687 tree arg2 = CALL_EXPR_ARG (exp, 2);
36688 rtx scratch0, scratch1;
36689 rtx op0 = expand_normal (arg0);
36690 rtx op1 = expand_normal (arg1);
36691 rtx op2 = expand_normal (arg2);
36692 machine_mode tmode0, tmode1, modev2, modev3, modeimm;
36694 tmode0 = insn_data[d->icode].operand[0].mode;
36695 tmode1 = insn_data[d->icode].operand[1].mode;
36696 modev2 = insn_data[d->icode].operand[2].mode;
36697 modev3 = insn_data[d->icode].operand[3].mode;
36698 modeimm = insn_data[d->icode].operand[4].mode;
36700 if (VECTOR_MODE_P (modev2))
36701 op0 = safe_vector_operand (op0, modev2);
36702 if (VECTOR_MODE_P (modev3))
36703 op1 = safe_vector_operand (op1, modev3);
36705 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36706 op0 = copy_to_mode_reg (modev2, op0);
36707 if ((optimize && !register_operand (op1, modev3))
36708 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
36709 op1 = copy_to_mode_reg (modev3, op1);
36711 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
36713 error ("the third argument must be an 8-bit immediate");
36714 return const0_rtx;
36717 if (d->code == IX86_BUILTIN_PCMPISTRI128)
36719 if (optimize || !target
36720 || GET_MODE (target) != tmode0
36721 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36722 target = gen_reg_rtx (tmode0);
36724 scratch1 = gen_reg_rtx (tmode1);
36726 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
36728 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
36730 if (optimize || !target
36731 || GET_MODE (target) != tmode1
36732 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36733 target = gen_reg_rtx (tmode1);
36735 scratch0 = gen_reg_rtx (tmode0);
36737 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
36739 else
36741 gcc_assert (d->flag);
36743 scratch0 = gen_reg_rtx (tmode0);
36744 scratch1 = gen_reg_rtx (tmode1);
36746 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
36749 if (! pat)
36750 return 0;
36752 emit_insn (pat);
36754 if (d->flag)
36756 target = gen_reg_rtx (SImode);
36757 emit_move_insn (target, const0_rtx);
36758 target = gen_rtx_SUBREG (QImode, target, 0);
36760 emit_insn
36761 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36762 gen_rtx_fmt_ee (EQ, QImode,
36763 gen_rtx_REG ((machine_mode) d->flag,
36764 FLAGS_REG),
36765 const0_rtx)));
36766 return SUBREG_REG (target);
36768 else
36769 return target;
36772 /* Subroutine of ix86_expand_builtin to take care of insns with
36773 variable number of operands. */
36775 static rtx
36776 ix86_expand_args_builtin (const struct builtin_description *d,
36777 tree exp, rtx target)
36779 rtx pat, real_target;
36780 unsigned int i, nargs;
36781 unsigned int nargs_constant = 0;
36782 unsigned int mask_pos = 0;
36783 int num_memory = 0;
36784 struct
36786 rtx op;
36787 machine_mode mode;
36788 } args[6];
36789 bool last_arg_count = false;
36790 enum insn_code icode = d->icode;
36791 const struct insn_data_d *insn_p = &insn_data[icode];
36792 machine_mode tmode = insn_p->operand[0].mode;
36793 machine_mode rmode = VOIDmode;
36794 bool swap = false;
36795 enum rtx_code comparison = d->comparison;
36797 switch ((enum ix86_builtin_func_type) d->flag)
36799 case V2DF_FTYPE_V2DF_ROUND:
36800 case V4DF_FTYPE_V4DF_ROUND:
36801 case V4SF_FTYPE_V4SF_ROUND:
36802 case V8SF_FTYPE_V8SF_ROUND:
36803 case V4SI_FTYPE_V4SF_ROUND:
36804 case V8SI_FTYPE_V8SF_ROUND:
36805 return ix86_expand_sse_round (d, exp, target);
36806 case V4SI_FTYPE_V2DF_V2DF_ROUND:
36807 case V8SI_FTYPE_V4DF_V4DF_ROUND:
36808 case V16SI_FTYPE_V8DF_V8DF_ROUND:
36809 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
36810 case INT_FTYPE_V8SF_V8SF_PTEST:
36811 case INT_FTYPE_V4DI_V4DI_PTEST:
36812 case INT_FTYPE_V4DF_V4DF_PTEST:
36813 case INT_FTYPE_V4SF_V4SF_PTEST:
36814 case INT_FTYPE_V2DI_V2DI_PTEST:
36815 case INT_FTYPE_V2DF_V2DF_PTEST:
36816 return ix86_expand_sse_ptest (d, exp, target);
36817 case FLOAT128_FTYPE_FLOAT128:
36818 case FLOAT_FTYPE_FLOAT:
36819 case INT_FTYPE_INT:
36820 case UINT64_FTYPE_INT:
36821 case UINT16_FTYPE_UINT16:
36822 case INT64_FTYPE_INT64:
36823 case INT64_FTYPE_V4SF:
36824 case INT64_FTYPE_V2DF:
36825 case INT_FTYPE_V16QI:
36826 case INT_FTYPE_V8QI:
36827 case INT_FTYPE_V8SF:
36828 case INT_FTYPE_V4DF:
36829 case INT_FTYPE_V4SF:
36830 case INT_FTYPE_V2DF:
36831 case INT_FTYPE_V32QI:
36832 case V16QI_FTYPE_V16QI:
36833 case V8SI_FTYPE_V8SF:
36834 case V8SI_FTYPE_V4SI:
36835 case V8HI_FTYPE_V8HI:
36836 case V8HI_FTYPE_V16QI:
36837 case V8QI_FTYPE_V8QI:
36838 case V8SF_FTYPE_V8SF:
36839 case V8SF_FTYPE_V8SI:
36840 case V8SF_FTYPE_V4SF:
36841 case V8SF_FTYPE_V8HI:
36842 case V4SI_FTYPE_V4SI:
36843 case V4SI_FTYPE_V16QI:
36844 case V4SI_FTYPE_V4SF:
36845 case V4SI_FTYPE_V8SI:
36846 case V4SI_FTYPE_V8HI:
36847 case V4SI_FTYPE_V4DF:
36848 case V4SI_FTYPE_V2DF:
36849 case V4HI_FTYPE_V4HI:
36850 case V4DF_FTYPE_V4DF:
36851 case V4DF_FTYPE_V4SI:
36852 case V4DF_FTYPE_V4SF:
36853 case V4DF_FTYPE_V2DF:
36854 case V4SF_FTYPE_V4SF:
36855 case V4SF_FTYPE_V4SI:
36856 case V4SF_FTYPE_V8SF:
36857 case V4SF_FTYPE_V4DF:
36858 case V4SF_FTYPE_V8HI:
36859 case V4SF_FTYPE_V2DF:
36860 case V2DI_FTYPE_V2DI:
36861 case V2DI_FTYPE_V16QI:
36862 case V2DI_FTYPE_V8HI:
36863 case V2DI_FTYPE_V4SI:
36864 case V2DF_FTYPE_V2DF:
36865 case V2DF_FTYPE_V4SI:
36866 case V2DF_FTYPE_V4DF:
36867 case V2DF_FTYPE_V4SF:
36868 case V2DF_FTYPE_V2SI:
36869 case V2SI_FTYPE_V2SI:
36870 case V2SI_FTYPE_V4SF:
36871 case V2SI_FTYPE_V2SF:
36872 case V2SI_FTYPE_V2DF:
36873 case V2SF_FTYPE_V2SF:
36874 case V2SF_FTYPE_V2SI:
36875 case V32QI_FTYPE_V32QI:
36876 case V32QI_FTYPE_V16QI:
36877 case V16HI_FTYPE_V16HI:
36878 case V16HI_FTYPE_V8HI:
36879 case V8SI_FTYPE_V8SI:
36880 case V16HI_FTYPE_V16QI:
36881 case V8SI_FTYPE_V16QI:
36882 case V4DI_FTYPE_V16QI:
36883 case V8SI_FTYPE_V8HI:
36884 case V4DI_FTYPE_V8HI:
36885 case V4DI_FTYPE_V4SI:
36886 case V4DI_FTYPE_V2DI:
36887 case HI_FTYPE_HI:
36888 case HI_FTYPE_V16QI:
36889 case SI_FTYPE_V32QI:
36890 case DI_FTYPE_V64QI:
36891 case V16QI_FTYPE_HI:
36892 case V32QI_FTYPE_SI:
36893 case V64QI_FTYPE_DI:
36894 case V8HI_FTYPE_QI:
36895 case V16HI_FTYPE_HI:
36896 case V32HI_FTYPE_SI:
36897 case V4SI_FTYPE_QI:
36898 case V8SI_FTYPE_QI:
36899 case V4SI_FTYPE_HI:
36900 case V8SI_FTYPE_HI:
36901 case QI_FTYPE_V8HI:
36902 case HI_FTYPE_V16HI:
36903 case SI_FTYPE_V32HI:
36904 case QI_FTYPE_V4SI:
36905 case QI_FTYPE_V8SI:
36906 case HI_FTYPE_V16SI:
36907 case QI_FTYPE_V2DI:
36908 case QI_FTYPE_V4DI:
36909 case QI_FTYPE_V8DI:
36910 case UINT_FTYPE_V2DF:
36911 case UINT_FTYPE_V4SF:
36912 case UINT64_FTYPE_V2DF:
36913 case UINT64_FTYPE_V4SF:
36914 case V16QI_FTYPE_V8DI:
36915 case V16HI_FTYPE_V16SI:
36916 case V16SI_FTYPE_HI:
36917 case V2DI_FTYPE_QI:
36918 case V4DI_FTYPE_QI:
36919 case V16SI_FTYPE_V16SI:
36920 case V16SI_FTYPE_INT:
36921 case V16SF_FTYPE_FLOAT:
36922 case V16SF_FTYPE_V8SF:
36923 case V16SI_FTYPE_V8SI:
36924 case V16SF_FTYPE_V4SF:
36925 case V16SI_FTYPE_V4SI:
36926 case V16SF_FTYPE_V16SF:
36927 case V8HI_FTYPE_V8DI:
36928 case V8UHI_FTYPE_V8UHI:
36929 case V8SI_FTYPE_V8DI:
36930 case V8SF_FTYPE_V8DF:
36931 case V8DI_FTYPE_QI:
36932 case V8DI_FTYPE_INT64:
36933 case V8DI_FTYPE_V4DI:
36934 case V8DI_FTYPE_V8DI:
36935 case V8DF_FTYPE_DOUBLE:
36936 case V8DF_FTYPE_V4DF:
36937 case V8DF_FTYPE_V2DF:
36938 case V8DF_FTYPE_V8DF:
36939 case V8DF_FTYPE_V8SI:
36940 nargs = 1;
36941 break;
36942 case V4SF_FTYPE_V4SF_VEC_MERGE:
36943 case V2DF_FTYPE_V2DF_VEC_MERGE:
36944 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
36945 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
36946 case V16QI_FTYPE_V16QI_V16QI:
36947 case V16QI_FTYPE_V8HI_V8HI:
36948 case V16SI_FTYPE_V16SI_V16SI:
36949 case V16SF_FTYPE_V16SF_V16SF:
36950 case V16SF_FTYPE_V16SF_V16SI:
36951 case V8QI_FTYPE_V8QI_V8QI:
36952 case V8QI_FTYPE_V4HI_V4HI:
36953 case V8HI_FTYPE_V8HI_V8HI:
36954 case V8HI_FTYPE_V16QI_V16QI:
36955 case V8HI_FTYPE_V4SI_V4SI:
36956 case V8SF_FTYPE_V8SF_V8SF:
36957 case V8SF_FTYPE_V8SF_V8SI:
36958 case V8DI_FTYPE_V8DI_V8DI:
36959 case V8DF_FTYPE_V8DF_V8DF:
36960 case V8DF_FTYPE_V8DF_V8DI:
36961 case V4SI_FTYPE_V4SI_V4SI:
36962 case V4SI_FTYPE_V8HI_V8HI:
36963 case V4SI_FTYPE_V4SF_V4SF:
36964 case V4SI_FTYPE_V2DF_V2DF:
36965 case V4HI_FTYPE_V4HI_V4HI:
36966 case V4HI_FTYPE_V8QI_V8QI:
36967 case V4HI_FTYPE_V2SI_V2SI:
36968 case V4DF_FTYPE_V4DF_V4DF:
36969 case V4DF_FTYPE_V4DF_V4DI:
36970 case V4SF_FTYPE_V4SF_V4SF:
36971 case V4SF_FTYPE_V4SF_V4SI:
36972 case V4SF_FTYPE_V4SF_V2SI:
36973 case V4SF_FTYPE_V4SF_V2DF:
36974 case V4SF_FTYPE_V4SF_UINT:
36975 case V4SF_FTYPE_V4SF_UINT64:
36976 case V4SF_FTYPE_V4SF_DI:
36977 case V4SF_FTYPE_V4SF_SI:
36978 case V2DI_FTYPE_V2DI_V2DI:
36979 case V2DI_FTYPE_V16QI_V16QI:
36980 case V2DI_FTYPE_V4SI_V4SI:
36981 case V2UDI_FTYPE_V4USI_V4USI:
36982 case V2DI_FTYPE_V2DI_V16QI:
36983 case V2DI_FTYPE_V2DF_V2DF:
36984 case V2SI_FTYPE_V2SI_V2SI:
36985 case V2SI_FTYPE_V4HI_V4HI:
36986 case V2SI_FTYPE_V2SF_V2SF:
36987 case V2DF_FTYPE_V2DF_V2DF:
36988 case V2DF_FTYPE_V2DF_V4SF:
36989 case V2DF_FTYPE_V2DF_V2DI:
36990 case V2DF_FTYPE_V2DF_DI:
36991 case V2DF_FTYPE_V2DF_SI:
36992 case V2DF_FTYPE_V2DF_UINT:
36993 case V2DF_FTYPE_V2DF_UINT64:
36994 case V2SF_FTYPE_V2SF_V2SF:
36995 case V1DI_FTYPE_V1DI_V1DI:
36996 case V1DI_FTYPE_V8QI_V8QI:
36997 case V1DI_FTYPE_V2SI_V2SI:
36998 case V32QI_FTYPE_V16HI_V16HI:
36999 case V16HI_FTYPE_V8SI_V8SI:
37000 case V32QI_FTYPE_V32QI_V32QI:
37001 case V16HI_FTYPE_V32QI_V32QI:
37002 case V16HI_FTYPE_V16HI_V16HI:
37003 case V8SI_FTYPE_V4DF_V4DF:
37004 case V8SI_FTYPE_V8SI_V8SI:
37005 case V8SI_FTYPE_V16HI_V16HI:
37006 case V4DI_FTYPE_V4DI_V4DI:
37007 case V4DI_FTYPE_V8SI_V8SI:
37008 case V4UDI_FTYPE_V8USI_V8USI:
37009 case QI_FTYPE_V8DI_V8DI:
37010 case V8DI_FTYPE_V64QI_V64QI:
37011 case HI_FTYPE_V16SI_V16SI:
37012 if (comparison == UNKNOWN)
37013 return ix86_expand_binop_builtin (icode, exp, target);
37014 nargs = 2;
37015 break;
37016 case V4SF_FTYPE_V4SF_V4SF_SWAP:
37017 case V2DF_FTYPE_V2DF_V2DF_SWAP:
37018 gcc_assert (comparison != UNKNOWN);
37019 nargs = 2;
37020 swap = true;
37021 break;
37022 case V16HI_FTYPE_V16HI_V8HI_COUNT:
37023 case V16HI_FTYPE_V16HI_SI_COUNT:
37024 case V8SI_FTYPE_V8SI_V4SI_COUNT:
37025 case V8SI_FTYPE_V8SI_SI_COUNT:
37026 case V4DI_FTYPE_V4DI_V2DI_COUNT:
37027 case V4DI_FTYPE_V4DI_INT_COUNT:
37028 case V8HI_FTYPE_V8HI_V8HI_COUNT:
37029 case V8HI_FTYPE_V8HI_SI_COUNT:
37030 case V4SI_FTYPE_V4SI_V4SI_COUNT:
37031 case V4SI_FTYPE_V4SI_SI_COUNT:
37032 case V4HI_FTYPE_V4HI_V4HI_COUNT:
37033 case V4HI_FTYPE_V4HI_SI_COUNT:
37034 case V2DI_FTYPE_V2DI_V2DI_COUNT:
37035 case V2DI_FTYPE_V2DI_SI_COUNT:
37036 case V2SI_FTYPE_V2SI_V2SI_COUNT:
37037 case V2SI_FTYPE_V2SI_SI_COUNT:
37038 case V1DI_FTYPE_V1DI_V1DI_COUNT:
37039 case V1DI_FTYPE_V1DI_SI_COUNT:
37040 nargs = 2;
37041 last_arg_count = true;
37042 break;
37043 case UINT64_FTYPE_UINT64_UINT64:
37044 case UINT_FTYPE_UINT_UINT:
37045 case UINT_FTYPE_UINT_USHORT:
37046 case UINT_FTYPE_UINT_UCHAR:
37047 case UINT16_FTYPE_UINT16_INT:
37048 case UINT8_FTYPE_UINT8_INT:
37049 case HI_FTYPE_HI_HI:
37050 case SI_FTYPE_SI_SI:
37051 case DI_FTYPE_DI_DI:
37052 case V16SI_FTYPE_V8DF_V8DF:
37053 nargs = 2;
37054 break;
37055 case V2DI_FTYPE_V2DI_INT_CONVERT:
37056 nargs = 2;
37057 rmode = V1TImode;
37058 nargs_constant = 1;
37059 break;
37060 case V4DI_FTYPE_V4DI_INT_CONVERT:
37061 nargs = 2;
37062 rmode = V2TImode;
37063 nargs_constant = 1;
37064 break;
37065 case V8DI_FTYPE_V8DI_INT_CONVERT:
37066 nargs = 2;
37067 rmode = V4TImode;
37068 nargs_constant = 1;
37069 break;
37070 case V8HI_FTYPE_V8HI_INT:
37071 case V8HI_FTYPE_V8SF_INT:
37072 case V16HI_FTYPE_V16SF_INT:
37073 case V8HI_FTYPE_V4SF_INT:
37074 case V8SF_FTYPE_V8SF_INT:
37075 case V4SF_FTYPE_V16SF_INT:
37076 case V16SF_FTYPE_V16SF_INT:
37077 case V4SI_FTYPE_V4SI_INT:
37078 case V4SI_FTYPE_V8SI_INT:
37079 case V4HI_FTYPE_V4HI_INT:
37080 case V4DF_FTYPE_V4DF_INT:
37081 case V4DF_FTYPE_V8DF_INT:
37082 case V4SF_FTYPE_V4SF_INT:
37083 case V4SF_FTYPE_V8SF_INT:
37084 case V2DI_FTYPE_V2DI_INT:
37085 case V2DF_FTYPE_V2DF_INT:
37086 case V2DF_FTYPE_V4DF_INT:
37087 case V16HI_FTYPE_V16HI_INT:
37088 case V8SI_FTYPE_V8SI_INT:
37089 case V16SI_FTYPE_V16SI_INT:
37090 case V4SI_FTYPE_V16SI_INT:
37091 case V4DI_FTYPE_V4DI_INT:
37092 case V2DI_FTYPE_V4DI_INT:
37093 case V4DI_FTYPE_V8DI_INT:
37094 case HI_FTYPE_HI_INT:
37095 case QI_FTYPE_V4SF_INT:
37096 case QI_FTYPE_V2DF_INT:
37097 nargs = 2;
37098 nargs_constant = 1;
37099 break;
37100 case V16QI_FTYPE_V16QI_V16QI_V16QI:
37101 case V8SF_FTYPE_V8SF_V8SF_V8SF:
37102 case V4DF_FTYPE_V4DF_V4DF_V4DF:
37103 case V4SF_FTYPE_V4SF_V4SF_V4SF:
37104 case V2DF_FTYPE_V2DF_V2DF_V2DF:
37105 case V32QI_FTYPE_V32QI_V32QI_V32QI:
37106 case HI_FTYPE_V16SI_V16SI_HI:
37107 case QI_FTYPE_V8DI_V8DI_QI:
37108 case V16HI_FTYPE_V16SI_V16HI_HI:
37109 case V16QI_FTYPE_V16SI_V16QI_HI:
37110 case V16QI_FTYPE_V8DI_V16QI_QI:
37111 case V16SF_FTYPE_V16SF_V16SF_HI:
37112 case V16SF_FTYPE_V16SF_V16SF_V16SF:
37113 case V16SF_FTYPE_V16SF_V16SI_V16SF:
37114 case V16SF_FTYPE_V16SI_V16SF_HI:
37115 case V16SF_FTYPE_V16SI_V16SF_V16SF:
37116 case V16SF_FTYPE_V4SF_V16SF_HI:
37117 case V16SI_FTYPE_SI_V16SI_HI:
37118 case V16SI_FTYPE_V16HI_V16SI_HI:
37119 case V16SI_FTYPE_V16QI_V16SI_HI:
37120 case V16SI_FTYPE_V16SF_V16SI_HI:
37121 case V8SF_FTYPE_V4SF_V8SF_QI:
37122 case V4DF_FTYPE_V2DF_V4DF_QI:
37123 case V8SI_FTYPE_V4SI_V8SI_QI:
37124 case V8SI_FTYPE_SI_V8SI_QI:
37125 case V4SI_FTYPE_V4SI_V4SI_QI:
37126 case V4SI_FTYPE_SI_V4SI_QI:
37127 case V4DI_FTYPE_V2DI_V4DI_QI:
37128 case V4DI_FTYPE_DI_V4DI_QI:
37129 case V2DI_FTYPE_V2DI_V2DI_QI:
37130 case V2DI_FTYPE_DI_V2DI_QI:
37131 case V64QI_FTYPE_V64QI_V64QI_DI:
37132 case V64QI_FTYPE_V16QI_V64QI_DI:
37133 case V64QI_FTYPE_QI_V64QI_DI:
37134 case V32QI_FTYPE_V32QI_V32QI_SI:
37135 case V32QI_FTYPE_V16QI_V32QI_SI:
37136 case V32QI_FTYPE_QI_V32QI_SI:
37137 case V16QI_FTYPE_V16QI_V16QI_HI:
37138 case V16QI_FTYPE_QI_V16QI_HI:
37139 case V32HI_FTYPE_V8HI_V32HI_SI:
37140 case V32HI_FTYPE_HI_V32HI_SI:
37141 case V16HI_FTYPE_V8HI_V16HI_HI:
37142 case V16HI_FTYPE_HI_V16HI_HI:
37143 case V8HI_FTYPE_V8HI_V8HI_QI:
37144 case V8HI_FTYPE_HI_V8HI_QI:
37145 case V8SF_FTYPE_V8HI_V8SF_QI:
37146 case V4SF_FTYPE_V8HI_V4SF_QI:
37147 case V8SI_FTYPE_V8SF_V8SI_QI:
37148 case V4SI_FTYPE_V4SF_V4SI_QI:
37149 case V8DI_FTYPE_V8SF_V8DI_QI:
37150 case V4DI_FTYPE_V4SF_V4DI_QI:
37151 case V2DI_FTYPE_V4SF_V2DI_QI:
37152 case V8SF_FTYPE_V8DI_V8SF_QI:
37153 case V4SF_FTYPE_V4DI_V4SF_QI:
37154 case V4SF_FTYPE_V2DI_V4SF_QI:
37155 case V8DF_FTYPE_V8DI_V8DF_QI:
37156 case V4DF_FTYPE_V4DI_V4DF_QI:
37157 case V2DF_FTYPE_V2DI_V2DF_QI:
37158 case V16QI_FTYPE_V8HI_V16QI_QI:
37159 case V16QI_FTYPE_V16HI_V16QI_HI:
37160 case V16QI_FTYPE_V4SI_V16QI_QI:
37161 case V16QI_FTYPE_V8SI_V16QI_QI:
37162 case V8HI_FTYPE_V4SI_V8HI_QI:
37163 case V8HI_FTYPE_V8SI_V8HI_QI:
37164 case V16QI_FTYPE_V2DI_V16QI_QI:
37165 case V16QI_FTYPE_V4DI_V16QI_QI:
37166 case V8HI_FTYPE_V2DI_V8HI_QI:
37167 case V8HI_FTYPE_V4DI_V8HI_QI:
37168 case V4SI_FTYPE_V2DI_V4SI_QI:
37169 case V4SI_FTYPE_V4DI_V4SI_QI:
37170 case V32QI_FTYPE_V32HI_V32QI_SI:
37171 case HI_FTYPE_V16QI_V16QI_HI:
37172 case SI_FTYPE_V32QI_V32QI_SI:
37173 case DI_FTYPE_V64QI_V64QI_DI:
37174 case QI_FTYPE_V8HI_V8HI_QI:
37175 case HI_FTYPE_V16HI_V16HI_HI:
37176 case SI_FTYPE_V32HI_V32HI_SI:
37177 case QI_FTYPE_V4SI_V4SI_QI:
37178 case QI_FTYPE_V8SI_V8SI_QI:
37179 case QI_FTYPE_V2DI_V2DI_QI:
37180 case QI_FTYPE_V4DI_V4DI_QI:
37181 case V4SF_FTYPE_V2DF_V4SF_QI:
37182 case V4SF_FTYPE_V4DF_V4SF_QI:
37183 case V16SI_FTYPE_V16SI_V16SI_HI:
37184 case V16SI_FTYPE_V16SI_V16SI_V16SI:
37185 case V16SI_FTYPE_V4SI_V16SI_HI:
37186 case V2DI_FTYPE_V2DI_V2DI_V2DI:
37187 case V2DI_FTYPE_V4SI_V2DI_QI:
37188 case V2DI_FTYPE_V8HI_V2DI_QI:
37189 case V2DI_FTYPE_V16QI_V2DI_QI:
37190 case V4DI_FTYPE_V4DI_V4DI_QI:
37191 case V4DI_FTYPE_V4SI_V4DI_QI:
37192 case V4DI_FTYPE_V8HI_V4DI_QI:
37193 case V4DI_FTYPE_V16QI_V4DI_QI:
37194 case V8DI_FTYPE_V8DF_V8DI_QI:
37195 case V4DI_FTYPE_V4DF_V4DI_QI:
37196 case V2DI_FTYPE_V2DF_V2DI_QI:
37197 case V4SI_FTYPE_V4DF_V4SI_QI:
37198 case V4SI_FTYPE_V2DF_V4SI_QI:
37199 case V4SI_FTYPE_V8HI_V4SI_QI:
37200 case V4SI_FTYPE_V16QI_V4SI_QI:
37201 case V8SI_FTYPE_V8SI_V8SI_V8SI:
37202 case V4DI_FTYPE_V4DI_V4DI_V4DI:
37203 case V8DF_FTYPE_V2DF_V8DF_QI:
37204 case V8DF_FTYPE_V4DF_V8DF_QI:
37205 case V8DF_FTYPE_V8DF_V8DF_QI:
37206 case V8DF_FTYPE_V8DF_V8DF_V8DF:
37207 case V8SF_FTYPE_V8SF_V8SF_QI:
37208 case V8SF_FTYPE_V8SI_V8SF_QI:
37209 case V4DF_FTYPE_V4DF_V4DF_QI:
37210 case V4SF_FTYPE_V4SF_V4SF_QI:
37211 case V2DF_FTYPE_V2DF_V2DF_QI:
37212 case V2DF_FTYPE_V4SF_V2DF_QI:
37213 case V2DF_FTYPE_V4SI_V2DF_QI:
37214 case V4SF_FTYPE_V4SI_V4SF_QI:
37215 case V4DF_FTYPE_V4SF_V4DF_QI:
37216 case V4DF_FTYPE_V4SI_V4DF_QI:
37217 case V8SI_FTYPE_V8SI_V8SI_QI:
37218 case V8SI_FTYPE_V8HI_V8SI_QI:
37219 case V8SI_FTYPE_V16QI_V8SI_QI:
37220 case V8DF_FTYPE_V8DF_V8DI_V8DF:
37221 case V8DF_FTYPE_V8DI_V8DF_V8DF:
37222 case V8DF_FTYPE_V8SF_V8DF_QI:
37223 case V8DF_FTYPE_V8SI_V8DF_QI:
37224 case V8DI_FTYPE_DI_V8DI_QI:
37225 case V16SF_FTYPE_V8SF_V16SF_HI:
37226 case V16SI_FTYPE_V8SI_V16SI_HI:
37227 case V16HI_FTYPE_V16HI_V16HI_HI:
37228 case V8HI_FTYPE_V16QI_V8HI_QI:
37229 case V16HI_FTYPE_V16QI_V16HI_HI:
37230 case V32HI_FTYPE_V32HI_V32HI_SI:
37231 case V32HI_FTYPE_V32QI_V32HI_SI:
37232 case V8DI_FTYPE_V16QI_V8DI_QI:
37233 case V8DI_FTYPE_V2DI_V8DI_QI:
37234 case V8DI_FTYPE_V4DI_V8DI_QI:
37235 case V8DI_FTYPE_V8DI_V8DI_QI:
37236 case V8DI_FTYPE_V8DI_V8DI_V8DI:
37237 case V8DI_FTYPE_V8HI_V8DI_QI:
37238 case V8DI_FTYPE_V8SI_V8DI_QI:
37239 case V8HI_FTYPE_V8DI_V8HI_QI:
37240 case V8SF_FTYPE_V8DF_V8SF_QI:
37241 case V8SI_FTYPE_V8DF_V8SI_QI:
37242 case V8SI_FTYPE_V8DI_V8SI_QI:
37243 case V4SI_FTYPE_V4SI_V4SI_V4SI:
37244 nargs = 3;
37245 break;
37246 case V32QI_FTYPE_V32QI_V32QI_INT:
37247 case V16HI_FTYPE_V16HI_V16HI_INT:
37248 case V16QI_FTYPE_V16QI_V16QI_INT:
37249 case V4DI_FTYPE_V4DI_V4DI_INT:
37250 case V8HI_FTYPE_V8HI_V8HI_INT:
37251 case V8SI_FTYPE_V8SI_V8SI_INT:
37252 case V8SI_FTYPE_V8SI_V4SI_INT:
37253 case V8SF_FTYPE_V8SF_V8SF_INT:
37254 case V8SF_FTYPE_V8SF_V4SF_INT:
37255 case V4SI_FTYPE_V4SI_V4SI_INT:
37256 case V4DF_FTYPE_V4DF_V4DF_INT:
37257 case V16SF_FTYPE_V16SF_V16SF_INT:
37258 case V16SF_FTYPE_V16SF_V4SF_INT:
37259 case V16SI_FTYPE_V16SI_V4SI_INT:
37260 case V4DF_FTYPE_V4DF_V2DF_INT:
37261 case V4SF_FTYPE_V4SF_V4SF_INT:
37262 case V2DI_FTYPE_V2DI_V2DI_INT:
37263 case V4DI_FTYPE_V4DI_V2DI_INT:
37264 case V2DF_FTYPE_V2DF_V2DF_INT:
37265 case QI_FTYPE_V8DI_V8DI_INT:
37266 case QI_FTYPE_V8DF_V8DF_INT:
37267 case QI_FTYPE_V2DF_V2DF_INT:
37268 case QI_FTYPE_V4SF_V4SF_INT:
37269 case HI_FTYPE_V16SI_V16SI_INT:
37270 case HI_FTYPE_V16SF_V16SF_INT:
37271 nargs = 3;
37272 nargs_constant = 1;
37273 break;
37274 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
37275 nargs = 3;
37276 rmode = V4DImode;
37277 nargs_constant = 1;
37278 break;
37279 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
37280 nargs = 3;
37281 rmode = V2DImode;
37282 nargs_constant = 1;
37283 break;
37284 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
37285 nargs = 3;
37286 rmode = DImode;
37287 nargs_constant = 1;
37288 break;
37289 case V2DI_FTYPE_V2DI_UINT_UINT:
37290 nargs = 3;
37291 nargs_constant = 2;
37292 break;
37293 case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
37294 nargs = 3;
37295 rmode = V8DImode;
37296 nargs_constant = 1;
37297 break;
37298 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT:
37299 nargs = 5;
37300 rmode = V8DImode;
37301 mask_pos = 2;
37302 nargs_constant = 1;
37303 break;
37304 case QI_FTYPE_V8DF_INT_QI:
37305 case QI_FTYPE_V4DF_INT_QI:
37306 case QI_FTYPE_V2DF_INT_QI:
37307 case HI_FTYPE_V16SF_INT_HI:
37308 case QI_FTYPE_V8SF_INT_QI:
37309 case QI_FTYPE_V4SF_INT_QI:
37310 nargs = 3;
37311 mask_pos = 1;
37312 nargs_constant = 1;
37313 break;
37314 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT:
37315 nargs = 5;
37316 rmode = V4DImode;
37317 mask_pos = 2;
37318 nargs_constant = 1;
37319 break;
37320 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT:
37321 nargs = 5;
37322 rmode = V2DImode;
37323 mask_pos = 2;
37324 nargs_constant = 1;
37325 break;
37326 case V32QI_FTYPE_V32QI_V32QI_V32QI_SI:
37327 case V32HI_FTYPE_V32HI_V32HI_V32HI_SI:
37328 case V32HI_FTYPE_V64QI_V64QI_V32HI_SI:
37329 case V16SI_FTYPE_V32HI_V32HI_V16SI_HI:
37330 case V64QI_FTYPE_V64QI_V64QI_V64QI_DI:
37331 case V32HI_FTYPE_V32HI_V8HI_V32HI_SI:
37332 case V16HI_FTYPE_V16HI_V8HI_V16HI_HI:
37333 case V8SI_FTYPE_V8SI_V4SI_V8SI_QI:
37334 case V4DI_FTYPE_V4DI_V2DI_V4DI_QI:
37335 case V64QI_FTYPE_V32HI_V32HI_V64QI_DI:
37336 case V32QI_FTYPE_V16HI_V16HI_V32QI_SI:
37337 case V16QI_FTYPE_V8HI_V8HI_V16QI_HI:
37338 case V32HI_FTYPE_V16SI_V16SI_V32HI_SI:
37339 case V16HI_FTYPE_V8SI_V8SI_V16HI_HI:
37340 case V8HI_FTYPE_V4SI_V4SI_V8HI_QI:
37341 case V4DF_FTYPE_V4DF_V4DI_V4DF_QI:
37342 case V8SF_FTYPE_V8SF_V8SI_V8SF_QI:
37343 case V4SF_FTYPE_V4SF_V4SI_V4SF_QI:
37344 case V2DF_FTYPE_V2DF_V2DI_V2DF_QI:
37345 case V2DI_FTYPE_V4SI_V4SI_V2DI_QI:
37346 case V4DI_FTYPE_V8SI_V8SI_V4DI_QI:
37347 case V4DF_FTYPE_V4DI_V4DF_V4DF_QI:
37348 case V8SF_FTYPE_V8SI_V8SF_V8SF_QI:
37349 case V2DF_FTYPE_V2DI_V2DF_V2DF_QI:
37350 case V4SF_FTYPE_V4SI_V4SF_V4SF_QI:
37351 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
37352 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
37353 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
37354 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
37355 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
37356 case V8HI_FTYPE_V8HI_V8HI_V8HI_QI:
37357 case V8SI_FTYPE_V8SI_V8SI_V8SI_QI:
37358 case V4SI_FTYPE_V4SI_V4SI_V4SI_QI:
37359 case V8SF_FTYPE_V8SF_V8SF_V8SF_QI:
37360 case V16QI_FTYPE_V16QI_V16QI_V16QI_HI:
37361 case V16HI_FTYPE_V16HI_V16HI_V16HI_HI:
37362 case V2DI_FTYPE_V2DI_V2DI_V2DI_QI:
37363 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
37364 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
37365 case V4DI_FTYPE_V4DI_V4DI_V4DI_QI:
37366 case V4DF_FTYPE_V4DF_V4DF_V4DF_QI:
37367 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
37368 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
37369 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
37370 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
37371 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
37372 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
37373 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
37374 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
37375 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
37376 case V8HI_FTYPE_V16QI_V16QI_V8HI_QI:
37377 case V16HI_FTYPE_V32QI_V32QI_V16HI_HI:
37378 case V8SI_FTYPE_V16HI_V16HI_V8SI_QI:
37379 case V4SI_FTYPE_V8HI_V8HI_V4SI_QI:
37380 nargs = 4;
37381 break;
37382 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
37383 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
37384 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
37385 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
37386 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
37387 nargs = 4;
37388 nargs_constant = 1;
37389 break;
37390 case QI_FTYPE_V4DI_V4DI_INT_QI:
37391 case QI_FTYPE_V8SI_V8SI_INT_QI:
37392 case QI_FTYPE_V4DF_V4DF_INT_QI:
37393 case QI_FTYPE_V8SF_V8SF_INT_QI:
37394 case QI_FTYPE_V2DI_V2DI_INT_QI:
37395 case QI_FTYPE_V4SI_V4SI_INT_QI:
37396 case QI_FTYPE_V2DF_V2DF_INT_QI:
37397 case QI_FTYPE_V4SF_V4SF_INT_QI:
37398 case DI_FTYPE_V64QI_V64QI_INT_DI:
37399 case SI_FTYPE_V32QI_V32QI_INT_SI:
37400 case HI_FTYPE_V16QI_V16QI_INT_HI:
37401 case SI_FTYPE_V32HI_V32HI_INT_SI:
37402 case HI_FTYPE_V16HI_V16HI_INT_HI:
37403 case QI_FTYPE_V8HI_V8HI_INT_QI:
37404 nargs = 4;
37405 mask_pos = 1;
37406 nargs_constant = 1;
37407 break;
37408 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
37409 nargs = 4;
37410 nargs_constant = 2;
37411 break;
37412 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
37413 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
37414 nargs = 4;
37415 break;
37416 case QI_FTYPE_V8DI_V8DI_INT_QI:
37417 case HI_FTYPE_V16SI_V16SI_INT_HI:
37418 case QI_FTYPE_V8DF_V8DF_INT_QI:
37419 case HI_FTYPE_V16SF_V16SF_INT_HI:
37420 mask_pos = 1;
37421 nargs = 4;
37422 nargs_constant = 1;
37423 break;
37424 case V8SF_FTYPE_V8SF_INT_V8SF_QI:
37425 case V4SF_FTYPE_V4SF_INT_V4SF_QI:
37426 case V2DF_FTYPE_V4DF_INT_V2DF_QI:
37427 case V2DI_FTYPE_V4DI_INT_V2DI_QI:
37428 case V8SF_FTYPE_V16SF_INT_V8SF_QI:
37429 case V8SI_FTYPE_V16SI_INT_V8SI_QI:
37430 case V2DF_FTYPE_V8DF_INT_V2DF_QI:
37431 case V2DI_FTYPE_V8DI_INT_V2DI_QI:
37432 case V4SF_FTYPE_V8SF_INT_V4SF_QI:
37433 case V4SI_FTYPE_V8SI_INT_V4SI_QI:
37434 case V8HI_FTYPE_V8SF_INT_V8HI_QI:
37435 case V8HI_FTYPE_V4SF_INT_V8HI_QI:
37436 case V32HI_FTYPE_V32HI_INT_V32HI_SI:
37437 case V16HI_FTYPE_V16HI_INT_V16HI_HI:
37438 case V8HI_FTYPE_V8HI_INT_V8HI_QI:
37439 case V4DI_FTYPE_V4DI_INT_V4DI_QI:
37440 case V2DI_FTYPE_V2DI_INT_V2DI_QI:
37441 case V8SI_FTYPE_V8SI_INT_V8SI_QI:
37442 case V4SI_FTYPE_V4SI_INT_V4SI_QI:
37443 case V4DF_FTYPE_V4DF_INT_V4DF_QI:
37444 case V2DF_FTYPE_V2DF_INT_V2DF_QI:
37445 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
37446 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
37447 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
37448 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
37449 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
37450 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
37451 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
37452 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
37453 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
37454 nargs = 4;
37455 mask_pos = 2;
37456 nargs_constant = 1;
37457 break;
37458 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
37459 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
37460 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
37461 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
37462 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
37463 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
37464 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
37465 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
37466 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
37467 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
37468 case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI:
37469 case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI:
37470 case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI:
37471 case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI:
37472 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI:
37473 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI:
37474 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI:
37475 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI:
37476 case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI:
37477 case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI:
37478 case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI:
37479 case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI:
37480 case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI:
37481 case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI:
37482 case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI:
37483 case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI:
37484 case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI:
37485 nargs = 5;
37486 mask_pos = 2;
37487 nargs_constant = 1;
37488 break;
37489 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
37490 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
37491 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
37492 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
37493 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
37494 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI:
37495 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI:
37496 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI:
37497 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI:
37498 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI:
37499 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI:
37500 nargs = 5;
37501 nargs = 5;
37502 mask_pos = 1;
37503 nargs_constant = 1;
37504 break;
37506 default:
37507 gcc_unreachable ();
37510 gcc_assert (nargs <= ARRAY_SIZE (args));
37512 if (comparison != UNKNOWN)
37514 gcc_assert (nargs == 2);
37515 return ix86_expand_sse_compare (d, exp, target, swap);
37518 if (rmode == VOIDmode || rmode == tmode)
37520 if (optimize
37521 || target == 0
37522 || GET_MODE (target) != tmode
37523 || !insn_p->operand[0].predicate (target, tmode))
37524 target = gen_reg_rtx (tmode);
37525 real_target = target;
37527 else
37529 real_target = gen_reg_rtx (tmode);
37530 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
37533 for (i = 0; i < nargs; i++)
37535 tree arg = CALL_EXPR_ARG (exp, i);
37536 rtx op = expand_normal (arg);
37537 machine_mode mode = insn_p->operand[i + 1].mode;
37538 bool match = insn_p->operand[i + 1].predicate (op, mode);
37540 if (last_arg_count && (i + 1) == nargs)
37542 /* SIMD shift insns take either an 8-bit immediate or
37543 register as count. But builtin functions take int as
37544 count. If count doesn't match, we put it in register. */
37545 if (!match)
37547 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
37548 if (!insn_p->operand[i + 1].predicate (op, mode))
37549 op = copy_to_reg (op);
37552 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37553 (!mask_pos && (nargs - i) <= nargs_constant))
37555 if (!match)
37556 switch (icode)
37558 case CODE_FOR_avx_vinsertf128v4di:
37559 case CODE_FOR_avx_vextractf128v4di:
37560 error ("the last argument must be an 1-bit immediate");
37561 return const0_rtx;
37563 case CODE_FOR_avx512f_cmpv8di3_mask:
37564 case CODE_FOR_avx512f_cmpv16si3_mask:
37565 case CODE_FOR_avx512f_ucmpv8di3_mask:
37566 case CODE_FOR_avx512f_ucmpv16si3_mask:
37567 case CODE_FOR_avx512vl_cmpv4di3_mask:
37568 case CODE_FOR_avx512vl_cmpv8si3_mask:
37569 case CODE_FOR_avx512vl_ucmpv4di3_mask:
37570 case CODE_FOR_avx512vl_ucmpv8si3_mask:
37571 case CODE_FOR_avx512vl_cmpv2di3_mask:
37572 case CODE_FOR_avx512vl_cmpv4si3_mask:
37573 case CODE_FOR_avx512vl_ucmpv2di3_mask:
37574 case CODE_FOR_avx512vl_ucmpv4si3_mask:
37575 error ("the last argument must be a 3-bit immediate");
37576 return const0_rtx;
37578 case CODE_FOR_sse4_1_roundsd:
37579 case CODE_FOR_sse4_1_roundss:
37581 case CODE_FOR_sse4_1_roundpd:
37582 case CODE_FOR_sse4_1_roundps:
37583 case CODE_FOR_avx_roundpd256:
37584 case CODE_FOR_avx_roundps256:
37586 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
37587 case CODE_FOR_sse4_1_roundps_sfix:
37588 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
37589 case CODE_FOR_avx_roundps_sfix256:
37591 case CODE_FOR_sse4_1_blendps:
37592 case CODE_FOR_avx_blendpd256:
37593 case CODE_FOR_avx_vpermilv4df:
37594 case CODE_FOR_avx_vpermilv4df_mask:
37595 case CODE_FOR_avx512f_getmantv8df_mask:
37596 case CODE_FOR_avx512f_getmantv16sf_mask:
37597 case CODE_FOR_avx512vl_getmantv8sf_mask:
37598 case CODE_FOR_avx512vl_getmantv4df_mask:
37599 case CODE_FOR_avx512vl_getmantv4sf_mask:
37600 case CODE_FOR_avx512vl_getmantv2df_mask:
37601 case CODE_FOR_avx512dq_rangepv8df_mask_round:
37602 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
37603 case CODE_FOR_avx512dq_rangepv4df_mask:
37604 case CODE_FOR_avx512dq_rangepv8sf_mask:
37605 case CODE_FOR_avx512dq_rangepv2df_mask:
37606 case CODE_FOR_avx512dq_rangepv4sf_mask:
37607 case CODE_FOR_avx_shufpd256_mask:
37608 error ("the last argument must be a 4-bit immediate");
37609 return const0_rtx;
37611 case CODE_FOR_sha1rnds4:
37612 case CODE_FOR_sse4_1_blendpd:
37613 case CODE_FOR_avx_vpermilv2df:
37614 case CODE_FOR_avx_vpermilv2df_mask:
37615 case CODE_FOR_xop_vpermil2v2df3:
37616 case CODE_FOR_xop_vpermil2v4sf3:
37617 case CODE_FOR_xop_vpermil2v4df3:
37618 case CODE_FOR_xop_vpermil2v8sf3:
37619 case CODE_FOR_avx512f_vinsertf32x4_mask:
37620 case CODE_FOR_avx512f_vinserti32x4_mask:
37621 case CODE_FOR_avx512f_vextractf32x4_mask:
37622 case CODE_FOR_avx512f_vextracti32x4_mask:
37623 case CODE_FOR_sse2_shufpd:
37624 case CODE_FOR_sse2_shufpd_mask:
37625 case CODE_FOR_avx512dq_shuf_f64x2_mask:
37626 case CODE_FOR_avx512dq_shuf_i64x2_mask:
37627 case CODE_FOR_avx512vl_shuf_i32x4_mask:
37628 case CODE_FOR_avx512vl_shuf_f32x4_mask:
37629 error ("the last argument must be a 2-bit immediate");
37630 return const0_rtx;
37632 case CODE_FOR_avx_vextractf128v4df:
37633 case CODE_FOR_avx_vextractf128v8sf:
37634 case CODE_FOR_avx_vextractf128v8si:
37635 case CODE_FOR_avx_vinsertf128v4df:
37636 case CODE_FOR_avx_vinsertf128v8sf:
37637 case CODE_FOR_avx_vinsertf128v8si:
37638 case CODE_FOR_avx512f_vinsertf64x4_mask:
37639 case CODE_FOR_avx512f_vinserti64x4_mask:
37640 case CODE_FOR_avx512f_vextractf64x4_mask:
37641 case CODE_FOR_avx512f_vextracti64x4_mask:
37642 case CODE_FOR_avx512dq_vinsertf32x8_mask:
37643 case CODE_FOR_avx512dq_vinserti32x8_mask:
37644 case CODE_FOR_avx512vl_vinsertv4df:
37645 case CODE_FOR_avx512vl_vinsertv4di:
37646 case CODE_FOR_avx512vl_vinsertv8sf:
37647 case CODE_FOR_avx512vl_vinsertv8si:
37648 error ("the last argument must be a 1-bit immediate");
37649 return const0_rtx;
37651 case CODE_FOR_avx_vmcmpv2df3:
37652 case CODE_FOR_avx_vmcmpv4sf3:
37653 case CODE_FOR_avx_cmpv2df3:
37654 case CODE_FOR_avx_cmpv4sf3:
37655 case CODE_FOR_avx_cmpv4df3:
37656 case CODE_FOR_avx_cmpv8sf3:
37657 case CODE_FOR_avx512f_cmpv8df3_mask:
37658 case CODE_FOR_avx512f_cmpv16sf3_mask:
37659 case CODE_FOR_avx512f_vmcmpv2df3_mask:
37660 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
37661 error ("the last argument must be a 5-bit immediate");
37662 return const0_rtx;
37664 default:
37665 switch (nargs_constant)
37667 case 2:
37668 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37669 (!mask_pos && (nargs - i) == nargs_constant))
37671 error ("the next to last argument must be an 8-bit immediate");
37672 break;
37674 case 1:
37675 error ("the last argument must be an 8-bit immediate");
37676 break;
37677 default:
37678 gcc_unreachable ();
37680 return const0_rtx;
37683 else
37685 if (VECTOR_MODE_P (mode))
37686 op = safe_vector_operand (op, mode);
37688 /* If we aren't optimizing, only allow one memory operand to
37689 be generated. */
37690 if (memory_operand (op, mode))
37691 num_memory++;
37693 op = fixup_modeless_constant (op, mode);
37695 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37697 if (optimize || !match || num_memory > 1)
37698 op = copy_to_mode_reg (mode, op);
37700 else
37702 op = copy_to_reg (op);
37703 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37707 args[i].op = op;
37708 args[i].mode = mode;
37711 switch (nargs)
37713 case 1:
37714 pat = GEN_FCN (icode) (real_target, args[0].op);
37715 break;
37716 case 2:
37717 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
37718 break;
37719 case 3:
37720 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37721 args[2].op);
37722 break;
37723 case 4:
37724 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37725 args[2].op, args[3].op);
37726 break;
37727 case 5:
37728 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37729 args[2].op, args[3].op, args[4].op);
37730 case 6:
37731 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37732 args[2].op, args[3].op, args[4].op,
37733 args[5].op);
37734 break;
37735 default:
37736 gcc_unreachable ();
37739 if (! pat)
37740 return 0;
37742 emit_insn (pat);
37743 return target;
37746 /* Transform pattern of following layout:
37747 (parallel [
37748 set (A B)
37749 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
37751 into:
37752 (set (A B))
37755 (parallel [ A B
37757 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
37760 into:
37761 (parallel [ A B ... ]) */
37763 static rtx
37764 ix86_erase_embedded_rounding (rtx pat)
37766 if (GET_CODE (pat) == INSN)
37767 pat = PATTERN (pat);
37769 gcc_assert (GET_CODE (pat) == PARALLEL);
37771 if (XVECLEN (pat, 0) == 2)
37773 rtx p0 = XVECEXP (pat, 0, 0);
37774 rtx p1 = XVECEXP (pat, 0, 1);
37776 gcc_assert (GET_CODE (p0) == SET
37777 && GET_CODE (p1) == UNSPEC
37778 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
37780 return p0;
37782 else
37784 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
37785 int i = 0;
37786 int j = 0;
37788 for (; i < XVECLEN (pat, 0); ++i)
37790 rtx elem = XVECEXP (pat, 0, i);
37791 if (GET_CODE (elem) != UNSPEC
37792 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
37793 res [j++] = elem;
37796 /* No more than 1 occurence was removed. */
37797 gcc_assert (j >= XVECLEN (pat, 0) - 1);
37799 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
37803 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
37804 with rounding. */
37805 static rtx
37806 ix86_expand_sse_comi_round (const struct builtin_description *d,
37807 tree exp, rtx target)
37809 rtx pat, set_dst;
37810 tree arg0 = CALL_EXPR_ARG (exp, 0);
37811 tree arg1 = CALL_EXPR_ARG (exp, 1);
37812 tree arg2 = CALL_EXPR_ARG (exp, 2);
37813 tree arg3 = CALL_EXPR_ARG (exp, 3);
37814 rtx op0 = expand_normal (arg0);
37815 rtx op1 = expand_normal (arg1);
37816 rtx op2 = expand_normal (arg2);
37817 rtx op3 = expand_normal (arg3);
37818 enum insn_code icode = d->icode;
37819 const struct insn_data_d *insn_p = &insn_data[icode];
37820 machine_mode mode0 = insn_p->operand[0].mode;
37821 machine_mode mode1 = insn_p->operand[1].mode;
37822 enum rtx_code comparison = UNEQ;
37823 bool need_ucomi = false;
37825 /* See avxintrin.h for values. */
37826 enum rtx_code comi_comparisons[32] =
37828 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
37829 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
37830 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
37832 bool need_ucomi_values[32] =
37834 true, false, false, true, true, false, false, true,
37835 true, false, false, true, true, false, false, true,
37836 false, true, true, false, false, true, true, false,
37837 false, true, true, false, false, true, true, false
37840 if (!CONST_INT_P (op2))
37842 error ("the third argument must be comparison constant");
37843 return const0_rtx;
37845 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
37847 error ("incorrect comparison mode");
37848 return const0_rtx;
37851 if (!insn_p->operand[2].predicate (op3, SImode))
37853 error ("incorrect rounding operand");
37854 return const0_rtx;
37857 comparison = comi_comparisons[INTVAL (op2)];
37858 need_ucomi = need_ucomi_values[INTVAL (op2)];
37860 if (VECTOR_MODE_P (mode0))
37861 op0 = safe_vector_operand (op0, mode0);
37862 if (VECTOR_MODE_P (mode1))
37863 op1 = safe_vector_operand (op1, mode1);
37865 target = gen_reg_rtx (SImode);
37866 emit_move_insn (target, const0_rtx);
37867 target = gen_rtx_SUBREG (QImode, target, 0);
37869 if ((optimize && !register_operand (op0, mode0))
37870 || !insn_p->operand[0].predicate (op0, mode0))
37871 op0 = copy_to_mode_reg (mode0, op0);
37872 if ((optimize && !register_operand (op1, mode1))
37873 || !insn_p->operand[1].predicate (op1, mode1))
37874 op1 = copy_to_mode_reg (mode1, op1);
37876 if (need_ucomi)
37877 icode = icode == CODE_FOR_sse_comi_round
37878 ? CODE_FOR_sse_ucomi_round
37879 : CODE_FOR_sse2_ucomi_round;
37881 pat = GEN_FCN (icode) (op0, op1, op3);
37882 if (! pat)
37883 return 0;
37885 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
37886 if (INTVAL (op3) == NO_ROUND)
37888 pat = ix86_erase_embedded_rounding (pat);
37889 if (! pat)
37890 return 0;
37892 set_dst = SET_DEST (pat);
37894 else
37896 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
37897 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
37900 emit_insn (pat);
37901 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
37902 gen_rtx_fmt_ee (comparison, QImode,
37903 set_dst,
37904 const0_rtx)));
37906 return SUBREG_REG (target);
37909 static rtx
37910 ix86_expand_round_builtin (const struct builtin_description *d,
37911 tree exp, rtx target)
37913 rtx pat;
37914 unsigned int i, nargs;
37915 struct
37917 rtx op;
37918 machine_mode mode;
37919 } args[6];
37920 enum insn_code icode = d->icode;
37921 const struct insn_data_d *insn_p = &insn_data[icode];
37922 machine_mode tmode = insn_p->operand[0].mode;
37923 unsigned int nargs_constant = 0;
37924 unsigned int redundant_embed_rnd = 0;
37926 switch ((enum ix86_builtin_func_type) d->flag)
37928 case UINT64_FTYPE_V2DF_INT:
37929 case UINT64_FTYPE_V4SF_INT:
37930 case UINT_FTYPE_V2DF_INT:
37931 case UINT_FTYPE_V4SF_INT:
37932 case INT64_FTYPE_V2DF_INT:
37933 case INT64_FTYPE_V4SF_INT:
37934 case INT_FTYPE_V2DF_INT:
37935 case INT_FTYPE_V4SF_INT:
37936 nargs = 2;
37937 break;
37938 case V4SF_FTYPE_V4SF_UINT_INT:
37939 case V4SF_FTYPE_V4SF_UINT64_INT:
37940 case V2DF_FTYPE_V2DF_UINT64_INT:
37941 case V4SF_FTYPE_V4SF_INT_INT:
37942 case V4SF_FTYPE_V4SF_INT64_INT:
37943 case V2DF_FTYPE_V2DF_INT64_INT:
37944 case V4SF_FTYPE_V4SF_V4SF_INT:
37945 case V2DF_FTYPE_V2DF_V2DF_INT:
37946 case V4SF_FTYPE_V4SF_V2DF_INT:
37947 case V2DF_FTYPE_V2DF_V4SF_INT:
37948 nargs = 3;
37949 break;
37950 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
37951 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
37952 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
37953 case V8DI_FTYPE_V8DF_V8DI_QI_INT:
37954 case V8SF_FTYPE_V8DI_V8SF_QI_INT:
37955 case V8DF_FTYPE_V8DI_V8DF_QI_INT:
37956 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
37957 case V8DI_FTYPE_V8SF_V8DI_QI_INT:
37958 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
37959 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
37960 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
37961 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
37962 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
37963 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
37964 nargs = 4;
37965 break;
37966 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
37967 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
37968 nargs_constant = 2;
37969 nargs = 4;
37970 break;
37971 case INT_FTYPE_V4SF_V4SF_INT_INT:
37972 case INT_FTYPE_V2DF_V2DF_INT_INT:
37973 return ix86_expand_sse_comi_round (d, exp, target);
37974 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
37975 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
37976 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
37977 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
37978 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
37979 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
37980 nargs = 5;
37981 break;
37982 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
37983 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
37984 nargs_constant = 4;
37985 nargs = 5;
37986 break;
37987 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
37988 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
37989 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
37990 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
37991 nargs_constant = 3;
37992 nargs = 5;
37993 break;
37994 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
37995 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
37996 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
37997 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
37998 nargs = 6;
37999 nargs_constant = 4;
38000 break;
38001 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
38002 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
38003 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
38004 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
38005 nargs = 6;
38006 nargs_constant = 3;
38007 break;
38008 default:
38009 gcc_unreachable ();
38011 gcc_assert (nargs <= ARRAY_SIZE (args));
38013 if (optimize
38014 || target == 0
38015 || GET_MODE (target) != tmode
38016 || !insn_p->operand[0].predicate (target, tmode))
38017 target = gen_reg_rtx (tmode);
38019 for (i = 0; i < nargs; i++)
38021 tree arg = CALL_EXPR_ARG (exp, i);
38022 rtx op = expand_normal (arg);
38023 machine_mode mode = insn_p->operand[i + 1].mode;
38024 bool match = insn_p->operand[i + 1].predicate (op, mode);
38026 if (i == nargs - nargs_constant)
38028 if (!match)
38030 switch (icode)
38032 case CODE_FOR_avx512f_getmantv8df_mask_round:
38033 case CODE_FOR_avx512f_getmantv16sf_mask_round:
38034 case CODE_FOR_avx512f_vgetmantv2df_round:
38035 case CODE_FOR_avx512f_vgetmantv4sf_round:
38036 error ("the immediate argument must be a 4-bit immediate");
38037 return const0_rtx;
38038 case CODE_FOR_avx512f_cmpv8df3_mask_round:
38039 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
38040 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
38041 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
38042 error ("the immediate argument must be a 5-bit immediate");
38043 return const0_rtx;
38044 default:
38045 error ("the immediate argument must be an 8-bit immediate");
38046 return const0_rtx;
38050 else if (i == nargs-1)
38052 if (!insn_p->operand[nargs].predicate (op, SImode))
38054 error ("incorrect rounding operand");
38055 return const0_rtx;
38058 /* If there is no rounding use normal version of the pattern. */
38059 if (INTVAL (op) == NO_ROUND)
38060 redundant_embed_rnd = 1;
38062 else
38064 if (VECTOR_MODE_P (mode))
38065 op = safe_vector_operand (op, mode);
38067 op = fixup_modeless_constant (op, mode);
38069 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
38071 if (optimize || !match)
38072 op = copy_to_mode_reg (mode, op);
38074 else
38076 op = copy_to_reg (op);
38077 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
38081 args[i].op = op;
38082 args[i].mode = mode;
38085 switch (nargs)
38087 case 1:
38088 pat = GEN_FCN (icode) (target, args[0].op);
38089 break;
38090 case 2:
38091 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
38092 break;
38093 case 3:
38094 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
38095 args[2].op);
38096 break;
38097 case 4:
38098 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
38099 args[2].op, args[3].op);
38100 break;
38101 case 5:
38102 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
38103 args[2].op, args[3].op, args[4].op);
38104 case 6:
38105 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
38106 args[2].op, args[3].op, args[4].op,
38107 args[5].op);
38108 break;
38109 default:
38110 gcc_unreachable ();
38113 if (!pat)
38114 return 0;
38116 if (redundant_embed_rnd)
38117 pat = ix86_erase_embedded_rounding (pat);
38119 emit_insn (pat);
38120 return target;
38123 /* Subroutine of ix86_expand_builtin to take care of special insns
38124 with variable number of operands. */
38126 static rtx
38127 ix86_expand_special_args_builtin (const struct builtin_description *d,
38128 tree exp, rtx target)
38130 tree arg;
38131 rtx pat, op;
38132 unsigned int i, nargs, arg_adjust, memory;
38133 bool aligned_mem = false;
38134 struct
38136 rtx op;
38137 machine_mode mode;
38138 } args[3];
38139 enum insn_code icode = d->icode;
38140 bool last_arg_constant = false;
38141 const struct insn_data_d *insn_p = &insn_data[icode];
38142 machine_mode tmode = insn_p->operand[0].mode;
38143 enum { load, store } klass;
38145 switch ((enum ix86_builtin_func_type) d->flag)
38147 case VOID_FTYPE_VOID:
38148 emit_insn (GEN_FCN (icode) (target));
38149 return 0;
38150 case VOID_FTYPE_UINT64:
38151 case VOID_FTYPE_UNSIGNED:
38152 nargs = 0;
38153 klass = store;
38154 memory = 0;
38155 break;
38157 case INT_FTYPE_VOID:
38158 case USHORT_FTYPE_VOID:
38159 case UINT64_FTYPE_VOID:
38160 case UNSIGNED_FTYPE_VOID:
38161 nargs = 0;
38162 klass = load;
38163 memory = 0;
38164 break;
38165 case UINT64_FTYPE_PUNSIGNED:
38166 case V2DI_FTYPE_PV2DI:
38167 case V4DI_FTYPE_PV4DI:
38168 case V32QI_FTYPE_PCCHAR:
38169 case V16QI_FTYPE_PCCHAR:
38170 case V8SF_FTYPE_PCV4SF:
38171 case V8SF_FTYPE_PCFLOAT:
38172 case V4SF_FTYPE_PCFLOAT:
38173 case V4DF_FTYPE_PCV2DF:
38174 case V4DF_FTYPE_PCDOUBLE:
38175 case V2DF_FTYPE_PCDOUBLE:
38176 case VOID_FTYPE_PVOID:
38177 case V16SI_FTYPE_PV4SI:
38178 case V16SF_FTYPE_PV4SF:
38179 case V8DI_FTYPE_PV4DI:
38180 case V8DI_FTYPE_PV8DI:
38181 case V8DF_FTYPE_PV4DF:
38182 nargs = 1;
38183 klass = load;
38184 memory = 0;
38185 switch (icode)
38187 case CODE_FOR_sse4_1_movntdqa:
38188 case CODE_FOR_avx2_movntdqa:
38189 case CODE_FOR_avx512f_movntdqa:
38190 aligned_mem = true;
38191 break;
38192 default:
38193 break;
38195 break;
38196 case VOID_FTYPE_PV2SF_V4SF:
38197 case VOID_FTYPE_PV8DI_V8DI:
38198 case VOID_FTYPE_PV4DI_V4DI:
38199 case VOID_FTYPE_PV2DI_V2DI:
38200 case VOID_FTYPE_PCHAR_V32QI:
38201 case VOID_FTYPE_PCHAR_V16QI:
38202 case VOID_FTYPE_PFLOAT_V16SF:
38203 case VOID_FTYPE_PFLOAT_V8SF:
38204 case VOID_FTYPE_PFLOAT_V4SF:
38205 case VOID_FTYPE_PDOUBLE_V8DF:
38206 case VOID_FTYPE_PDOUBLE_V4DF:
38207 case VOID_FTYPE_PDOUBLE_V2DF:
38208 case VOID_FTYPE_PLONGLONG_LONGLONG:
38209 case VOID_FTYPE_PULONGLONG_ULONGLONG:
38210 case VOID_FTYPE_PINT_INT:
38211 nargs = 1;
38212 klass = store;
38213 /* Reserve memory operand for target. */
38214 memory = ARRAY_SIZE (args);
38215 switch (icode)
38217 /* These builtins and instructions require the memory
38218 to be properly aligned. */
38219 case CODE_FOR_avx_movntv4di:
38220 case CODE_FOR_sse2_movntv2di:
38221 case CODE_FOR_avx_movntv8sf:
38222 case CODE_FOR_sse_movntv4sf:
38223 case CODE_FOR_sse4a_vmmovntv4sf:
38224 case CODE_FOR_avx_movntv4df:
38225 case CODE_FOR_sse2_movntv2df:
38226 case CODE_FOR_sse4a_vmmovntv2df:
38227 case CODE_FOR_sse2_movntidi:
38228 case CODE_FOR_sse_movntq:
38229 case CODE_FOR_sse2_movntisi:
38230 case CODE_FOR_avx512f_movntv16sf:
38231 case CODE_FOR_avx512f_movntv8df:
38232 case CODE_FOR_avx512f_movntv8di:
38233 aligned_mem = true;
38234 break;
38235 default:
38236 break;
38238 break;
38239 case V4SF_FTYPE_V4SF_PCV2SF:
38240 case V2DF_FTYPE_V2DF_PCDOUBLE:
38241 nargs = 2;
38242 klass = load;
38243 memory = 1;
38244 break;
38245 case V8SF_FTYPE_PCV8SF_V8SI:
38246 case V4DF_FTYPE_PCV4DF_V4DI:
38247 case V4SF_FTYPE_PCV4SF_V4SI:
38248 case V2DF_FTYPE_PCV2DF_V2DI:
38249 case V8SI_FTYPE_PCV8SI_V8SI:
38250 case V4DI_FTYPE_PCV4DI_V4DI:
38251 case V4SI_FTYPE_PCV4SI_V4SI:
38252 case V2DI_FTYPE_PCV2DI_V2DI:
38253 nargs = 2;
38254 klass = load;
38255 memory = 0;
38256 break;
38257 case VOID_FTYPE_PV8DF_V8DF_QI:
38258 case VOID_FTYPE_PV16SF_V16SF_HI:
38259 case VOID_FTYPE_PV8DI_V8DI_QI:
38260 case VOID_FTYPE_PV4DI_V4DI_QI:
38261 case VOID_FTYPE_PV2DI_V2DI_QI:
38262 case VOID_FTYPE_PV16SI_V16SI_HI:
38263 case VOID_FTYPE_PV8SI_V8SI_QI:
38264 case VOID_FTYPE_PV4SI_V4SI_QI:
38265 switch (icode)
38267 /* These builtins and instructions require the memory
38268 to be properly aligned. */
38269 case CODE_FOR_avx512f_storev16sf_mask:
38270 case CODE_FOR_avx512f_storev16si_mask:
38271 case CODE_FOR_avx512f_storev8df_mask:
38272 case CODE_FOR_avx512f_storev8di_mask:
38273 case CODE_FOR_avx512vl_storev8sf_mask:
38274 case CODE_FOR_avx512vl_storev8si_mask:
38275 case CODE_FOR_avx512vl_storev4df_mask:
38276 case CODE_FOR_avx512vl_storev4di_mask:
38277 case CODE_FOR_avx512vl_storev4sf_mask:
38278 case CODE_FOR_avx512vl_storev4si_mask:
38279 case CODE_FOR_avx512vl_storev2df_mask:
38280 case CODE_FOR_avx512vl_storev2di_mask:
38281 aligned_mem = true;
38282 break;
38283 default:
38284 break;
38286 /* FALLTHRU */
38287 case VOID_FTYPE_PV8SF_V8SI_V8SF:
38288 case VOID_FTYPE_PV4DF_V4DI_V4DF:
38289 case VOID_FTYPE_PV4SF_V4SI_V4SF:
38290 case VOID_FTYPE_PV2DF_V2DI_V2DF:
38291 case VOID_FTYPE_PV8SI_V8SI_V8SI:
38292 case VOID_FTYPE_PV4DI_V4DI_V4DI:
38293 case VOID_FTYPE_PV4SI_V4SI_V4SI:
38294 case VOID_FTYPE_PV2DI_V2DI_V2DI:
38295 case VOID_FTYPE_PDOUBLE_V2DF_QI:
38296 case VOID_FTYPE_PFLOAT_V4SF_QI:
38297 case VOID_FTYPE_PV8SI_V8DI_QI:
38298 case VOID_FTYPE_PV8HI_V8DI_QI:
38299 case VOID_FTYPE_PV16HI_V16SI_HI:
38300 case VOID_FTYPE_PV16QI_V8DI_QI:
38301 case VOID_FTYPE_PV16QI_V16SI_HI:
38302 case VOID_FTYPE_PV4SI_V4DI_QI:
38303 case VOID_FTYPE_PV4SI_V2DI_QI:
38304 case VOID_FTYPE_PV8HI_V4DI_QI:
38305 case VOID_FTYPE_PV8HI_V2DI_QI:
38306 case VOID_FTYPE_PV8HI_V8SI_QI:
38307 case VOID_FTYPE_PV8HI_V4SI_QI:
38308 case VOID_FTYPE_PV16QI_V4DI_QI:
38309 case VOID_FTYPE_PV16QI_V2DI_QI:
38310 case VOID_FTYPE_PV16QI_V8SI_QI:
38311 case VOID_FTYPE_PV16QI_V4SI_QI:
38312 case VOID_FTYPE_PV8HI_V8HI_QI:
38313 case VOID_FTYPE_PV16HI_V16HI_HI:
38314 case VOID_FTYPE_PV32HI_V32HI_SI:
38315 case VOID_FTYPE_PV16QI_V16QI_HI:
38316 case VOID_FTYPE_PV32QI_V32QI_SI:
38317 case VOID_FTYPE_PV64QI_V64QI_DI:
38318 case VOID_FTYPE_PV4DF_V4DF_QI:
38319 case VOID_FTYPE_PV2DF_V2DF_QI:
38320 case VOID_FTYPE_PV8SF_V8SF_QI:
38321 case VOID_FTYPE_PV4SF_V4SF_QI:
38322 nargs = 2;
38323 klass = store;
38324 /* Reserve memory operand for target. */
38325 memory = ARRAY_SIZE (args);
38326 break;
38327 case V4SF_FTYPE_PCV4SF_V4SF_QI:
38328 case V8SF_FTYPE_PCV8SF_V8SF_QI:
38329 case V16SF_FTYPE_PCV16SF_V16SF_HI:
38330 case V4SI_FTYPE_PCV4SI_V4SI_QI:
38331 case V8SI_FTYPE_PCV8SI_V8SI_QI:
38332 case V16SI_FTYPE_PCV16SI_V16SI_HI:
38333 case V2DF_FTYPE_PCV2DF_V2DF_QI:
38334 case V4DF_FTYPE_PCV4DF_V4DF_QI:
38335 case V8DF_FTYPE_PCV8DF_V8DF_QI:
38336 case V2DI_FTYPE_PCV2DI_V2DI_QI:
38337 case V4DI_FTYPE_PCV4DI_V4DI_QI:
38338 case V8DI_FTYPE_PCV8DI_V8DI_QI:
38339 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
38340 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
38341 case V8HI_FTYPE_PCV8HI_V8HI_QI:
38342 case V16HI_FTYPE_PCV16HI_V16HI_HI:
38343 case V32HI_FTYPE_PCV32HI_V32HI_SI:
38344 case V16QI_FTYPE_PCV16QI_V16QI_HI:
38345 case V32QI_FTYPE_PCV32QI_V32QI_SI:
38346 case V64QI_FTYPE_PCV64QI_V64QI_DI:
38347 nargs = 3;
38348 klass = load;
38349 memory = 0;
38350 switch (icode)
38352 /* These builtins and instructions require the memory
38353 to be properly aligned. */
38354 case CODE_FOR_avx512f_loadv16sf_mask:
38355 case CODE_FOR_avx512f_loadv16si_mask:
38356 case CODE_FOR_avx512f_loadv8df_mask:
38357 case CODE_FOR_avx512f_loadv8di_mask:
38358 case CODE_FOR_avx512vl_loadv8sf_mask:
38359 case CODE_FOR_avx512vl_loadv8si_mask:
38360 case CODE_FOR_avx512vl_loadv4df_mask:
38361 case CODE_FOR_avx512vl_loadv4di_mask:
38362 case CODE_FOR_avx512vl_loadv4sf_mask:
38363 case CODE_FOR_avx512vl_loadv4si_mask:
38364 case CODE_FOR_avx512vl_loadv2df_mask:
38365 case CODE_FOR_avx512vl_loadv2di_mask:
38366 case CODE_FOR_avx512bw_loadv64qi_mask:
38367 case CODE_FOR_avx512vl_loadv32qi_mask:
38368 case CODE_FOR_avx512vl_loadv16qi_mask:
38369 case CODE_FOR_avx512bw_loadv32hi_mask:
38370 case CODE_FOR_avx512vl_loadv16hi_mask:
38371 case CODE_FOR_avx512vl_loadv8hi_mask:
38372 aligned_mem = true;
38373 break;
38374 default:
38375 break;
38377 break;
38378 case VOID_FTYPE_UINT_UINT_UINT:
38379 case VOID_FTYPE_UINT64_UINT_UINT:
38380 case UCHAR_FTYPE_UINT_UINT_UINT:
38381 case UCHAR_FTYPE_UINT64_UINT_UINT:
38382 nargs = 3;
38383 klass = load;
38384 memory = ARRAY_SIZE (args);
38385 last_arg_constant = true;
38386 break;
38387 default:
38388 gcc_unreachable ();
38391 gcc_assert (nargs <= ARRAY_SIZE (args));
38393 if (klass == store)
38395 arg = CALL_EXPR_ARG (exp, 0);
38396 op = expand_normal (arg);
38397 gcc_assert (target == 0);
38398 if (memory)
38400 op = ix86_zero_extend_to_Pmode (op);
38401 target = gen_rtx_MEM (tmode, op);
38402 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
38403 on it. Try to improve it using get_pointer_alignment,
38404 and if the special builtin is one that requires strict
38405 mode alignment, also from it's GET_MODE_ALIGNMENT.
38406 Failure to do so could lead to ix86_legitimate_combined_insn
38407 rejecting all changes to such insns. */
38408 unsigned int align = get_pointer_alignment (arg);
38409 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
38410 align = GET_MODE_ALIGNMENT (tmode);
38411 if (MEM_ALIGN (target) < align)
38412 set_mem_align (target, align);
38414 else
38415 target = force_reg (tmode, op);
38416 arg_adjust = 1;
38418 else
38420 arg_adjust = 0;
38421 if (optimize
38422 || target == 0
38423 || !register_operand (target, tmode)
38424 || GET_MODE (target) != tmode)
38425 target = gen_reg_rtx (tmode);
38428 for (i = 0; i < nargs; i++)
38430 machine_mode mode = insn_p->operand[i + 1].mode;
38431 bool match;
38433 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
38434 op = expand_normal (arg);
38435 match = insn_p->operand[i + 1].predicate (op, mode);
38437 if (last_arg_constant && (i + 1) == nargs)
38439 if (!match)
38441 if (icode == CODE_FOR_lwp_lwpvalsi3
38442 || icode == CODE_FOR_lwp_lwpinssi3
38443 || icode == CODE_FOR_lwp_lwpvaldi3
38444 || icode == CODE_FOR_lwp_lwpinsdi3)
38445 error ("the last argument must be a 32-bit immediate");
38446 else
38447 error ("the last argument must be an 8-bit immediate");
38448 return const0_rtx;
38451 else
38453 if (i == memory)
38455 /* This must be the memory operand. */
38456 op = ix86_zero_extend_to_Pmode (op);
38457 op = gen_rtx_MEM (mode, op);
38458 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
38459 on it. Try to improve it using get_pointer_alignment,
38460 and if the special builtin is one that requires strict
38461 mode alignment, also from it's GET_MODE_ALIGNMENT.
38462 Failure to do so could lead to ix86_legitimate_combined_insn
38463 rejecting all changes to such insns. */
38464 unsigned int align = get_pointer_alignment (arg);
38465 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
38466 align = GET_MODE_ALIGNMENT (mode);
38467 if (MEM_ALIGN (op) < align)
38468 set_mem_align (op, align);
38470 else
38472 /* This must be register. */
38473 if (VECTOR_MODE_P (mode))
38474 op = safe_vector_operand (op, mode);
38476 op = fixup_modeless_constant (op, mode);
38478 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
38479 op = copy_to_mode_reg (mode, op);
38480 else
38482 op = copy_to_reg (op);
38483 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
38488 args[i].op = op;
38489 args[i].mode = mode;
38492 switch (nargs)
38494 case 0:
38495 pat = GEN_FCN (icode) (target);
38496 break;
38497 case 1:
38498 pat = GEN_FCN (icode) (target, args[0].op);
38499 break;
38500 case 2:
38501 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
38502 break;
38503 case 3:
38504 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
38505 break;
38506 default:
38507 gcc_unreachable ();
38510 if (! pat)
38511 return 0;
38512 emit_insn (pat);
38513 return klass == store ? 0 : target;
38516 /* Return the integer constant in ARG. Constrain it to be in the range
38517 of the subparts of VEC_TYPE; issue an error if not. */
38519 static int
38520 get_element_number (tree vec_type, tree arg)
38522 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
38524 if (!tree_fits_uhwi_p (arg)
38525 || (elt = tree_to_uhwi (arg), elt > max))
38527 error ("selector must be an integer constant in the range 0..%wi", max);
38528 return 0;
38531 return elt;
38534 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38535 ix86_expand_vector_init. We DO have language-level syntax for this, in
38536 the form of (type){ init-list }. Except that since we can't place emms
38537 instructions from inside the compiler, we can't allow the use of MMX
38538 registers unless the user explicitly asks for it. So we do *not* define
38539 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
38540 we have builtins invoked by mmintrin.h that gives us license to emit
38541 these sorts of instructions. */
38543 static rtx
38544 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
38546 machine_mode tmode = TYPE_MODE (type);
38547 machine_mode inner_mode = GET_MODE_INNER (tmode);
38548 int i, n_elt = GET_MODE_NUNITS (tmode);
38549 rtvec v = rtvec_alloc (n_elt);
38551 gcc_assert (VECTOR_MODE_P (tmode));
38552 gcc_assert (call_expr_nargs (exp) == n_elt);
38554 for (i = 0; i < n_elt; ++i)
38556 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
38557 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
38560 if (!target || !register_operand (target, tmode))
38561 target = gen_reg_rtx (tmode);
38563 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
38564 return target;
38567 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38568 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
38569 had a language-level syntax for referencing vector elements. */
38571 static rtx
38572 ix86_expand_vec_ext_builtin (tree exp, rtx target)
38574 machine_mode tmode, mode0;
38575 tree arg0, arg1;
38576 int elt;
38577 rtx op0;
38579 arg0 = CALL_EXPR_ARG (exp, 0);
38580 arg1 = CALL_EXPR_ARG (exp, 1);
38582 op0 = expand_normal (arg0);
38583 elt = get_element_number (TREE_TYPE (arg0), arg1);
38585 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38586 mode0 = TYPE_MODE (TREE_TYPE (arg0));
38587 gcc_assert (VECTOR_MODE_P (mode0));
38589 op0 = force_reg (mode0, op0);
38591 if (optimize || !target || !register_operand (target, tmode))
38592 target = gen_reg_rtx (tmode);
38594 ix86_expand_vector_extract (true, target, op0, elt);
38596 return target;
38599 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38600 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
38601 a language-level syntax for referencing vector elements. */
38603 static rtx
38604 ix86_expand_vec_set_builtin (tree exp)
38606 machine_mode tmode, mode1;
38607 tree arg0, arg1, arg2;
38608 int elt;
38609 rtx op0, op1, target;
38611 arg0 = CALL_EXPR_ARG (exp, 0);
38612 arg1 = CALL_EXPR_ARG (exp, 1);
38613 arg2 = CALL_EXPR_ARG (exp, 2);
38615 tmode = TYPE_MODE (TREE_TYPE (arg0));
38616 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38617 gcc_assert (VECTOR_MODE_P (tmode));
38619 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
38620 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
38621 elt = get_element_number (TREE_TYPE (arg0), arg2);
38623 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
38624 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
38626 op0 = force_reg (tmode, op0);
38627 op1 = force_reg (mode1, op1);
38629 /* OP0 is the source of these builtin functions and shouldn't be
38630 modified. Create a copy, use it and return it as target. */
38631 target = gen_reg_rtx (tmode);
38632 emit_move_insn (target, op0);
38633 ix86_expand_vector_set (true, target, op1, elt);
38635 return target;
38638 /* Emit conditional move of SRC to DST with condition
38639 OP1 CODE OP2. */
38640 static void
38641 ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
38643 rtx t;
38645 if (TARGET_CMOVE)
38647 t = ix86_expand_compare (code, op1, op2);
38648 emit_insn (gen_rtx_SET (dst, gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
38649 src, dst)));
38651 else
38653 rtx_code_label *nomove = gen_label_rtx ();
38654 emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
38655 const0_rtx, GET_MODE (op1), 1, nomove);
38656 emit_move_insn (dst, src);
38657 emit_label (nomove);
38661 /* Choose max of DST and SRC and put it to DST. */
38662 static void
38663 ix86_emit_move_max (rtx dst, rtx src)
38665 ix86_emit_cmove (dst, src, LTU, dst, src);
38668 /* Expand an expression EXP that calls a built-in function,
38669 with result going to TARGET if that's convenient
38670 (and in mode MODE if that's convenient).
38671 SUBTARGET may be used as the target for computing one of EXP's operands.
38672 IGNORE is nonzero if the value is to be ignored. */
38674 static rtx
38675 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
38676 machine_mode mode, int ignore)
38678 const struct builtin_description *d;
38679 size_t i;
38680 enum insn_code icode;
38681 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
38682 tree arg0, arg1, arg2, arg3, arg4;
38683 rtx op0, op1, op2, op3, op4, pat, insn;
38684 machine_mode mode0, mode1, mode2, mode3, mode4;
38685 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
38687 /* For CPU builtins that can be folded, fold first and expand the fold. */
38688 switch (fcode)
38690 case IX86_BUILTIN_CPU_INIT:
38692 /* Make it call __cpu_indicator_init in libgcc. */
38693 tree call_expr, fndecl, type;
38694 type = build_function_type_list (integer_type_node, NULL_TREE);
38695 fndecl = build_fn_decl ("__cpu_indicator_init", type);
38696 call_expr = build_call_expr (fndecl, 0);
38697 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
38699 case IX86_BUILTIN_CPU_IS:
38700 case IX86_BUILTIN_CPU_SUPPORTS:
38702 tree arg0 = CALL_EXPR_ARG (exp, 0);
38703 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
38704 gcc_assert (fold_expr != NULL_TREE);
38705 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
38709 /* Determine whether the builtin function is available under the current ISA.
38710 Originally the builtin was not created if it wasn't applicable to the
38711 current ISA based on the command line switches. With function specific
38712 options, we need to check in the context of the function making the call
38713 whether it is supported. */
38714 if (ix86_builtins_isa[fcode].isa
38715 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
38717 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
38718 NULL, (enum fpmath_unit) 0, false);
38720 if (!opts)
38721 error ("%qE needs unknown isa option", fndecl);
38722 else
38724 gcc_assert (opts != NULL);
38725 error ("%qE needs isa option %s", fndecl, opts);
38726 free (opts);
38728 return const0_rtx;
38731 switch (fcode)
38733 case IX86_BUILTIN_BNDMK:
38734 if (!target
38735 || GET_MODE (target) != BNDmode
38736 || !register_operand (target, BNDmode))
38737 target = gen_reg_rtx (BNDmode);
38739 arg0 = CALL_EXPR_ARG (exp, 0);
38740 arg1 = CALL_EXPR_ARG (exp, 1);
38742 op0 = expand_normal (arg0);
38743 op1 = expand_normal (arg1);
38745 if (!register_operand (op0, Pmode))
38746 op0 = ix86_zero_extend_to_Pmode (op0);
38747 if (!register_operand (op1, Pmode))
38748 op1 = ix86_zero_extend_to_Pmode (op1);
38750 /* Builtin arg1 is size of block but instruction op1 should
38751 be (size - 1). */
38752 op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
38753 NULL_RTX, 1, OPTAB_DIRECT);
38755 emit_insn (BNDmode == BND64mode
38756 ? gen_bnd64_mk (target, op0, op1)
38757 : gen_bnd32_mk (target, op0, op1));
38758 return target;
38760 case IX86_BUILTIN_BNDSTX:
38761 arg0 = CALL_EXPR_ARG (exp, 0);
38762 arg1 = CALL_EXPR_ARG (exp, 1);
38763 arg2 = CALL_EXPR_ARG (exp, 2);
38765 op0 = expand_normal (arg0);
38766 op1 = expand_normal (arg1);
38767 op2 = expand_normal (arg2);
38769 if (!register_operand (op0, Pmode))
38770 op0 = ix86_zero_extend_to_Pmode (op0);
38771 if (!register_operand (op1, BNDmode))
38772 op1 = copy_to_mode_reg (BNDmode, op1);
38773 if (!register_operand (op2, Pmode))
38774 op2 = ix86_zero_extend_to_Pmode (op2);
38776 emit_insn (BNDmode == BND64mode
38777 ? gen_bnd64_stx (op2, op0, op1)
38778 : gen_bnd32_stx (op2, op0, op1));
38779 return 0;
38781 case IX86_BUILTIN_BNDLDX:
38782 if (!target
38783 || GET_MODE (target) != BNDmode
38784 || !register_operand (target, BNDmode))
38785 target = gen_reg_rtx (BNDmode);
38787 arg0 = CALL_EXPR_ARG (exp, 0);
38788 arg1 = CALL_EXPR_ARG (exp, 1);
38790 op0 = expand_normal (arg0);
38791 op1 = expand_normal (arg1);
38793 if (!register_operand (op0, Pmode))
38794 op0 = ix86_zero_extend_to_Pmode (op0);
38795 if (!register_operand (op1, Pmode))
38796 op1 = ix86_zero_extend_to_Pmode (op1);
38798 emit_insn (BNDmode == BND64mode
38799 ? gen_bnd64_ldx (target, op0, op1)
38800 : gen_bnd32_ldx (target, op0, op1));
38801 return target;
38803 case IX86_BUILTIN_BNDCL:
38804 arg0 = CALL_EXPR_ARG (exp, 0);
38805 arg1 = CALL_EXPR_ARG (exp, 1);
38807 op0 = expand_normal (arg0);
38808 op1 = expand_normal (arg1);
38810 if (!register_operand (op0, Pmode))
38811 op0 = ix86_zero_extend_to_Pmode (op0);
38812 if (!register_operand (op1, BNDmode))
38813 op1 = copy_to_mode_reg (BNDmode, op1);
38815 emit_insn (BNDmode == BND64mode
38816 ? gen_bnd64_cl (op1, op0)
38817 : gen_bnd32_cl (op1, op0));
38818 return 0;
38820 case IX86_BUILTIN_BNDCU:
38821 arg0 = CALL_EXPR_ARG (exp, 0);
38822 arg1 = CALL_EXPR_ARG (exp, 1);
38824 op0 = expand_normal (arg0);
38825 op1 = expand_normal (arg1);
38827 if (!register_operand (op0, Pmode))
38828 op0 = ix86_zero_extend_to_Pmode (op0);
38829 if (!register_operand (op1, BNDmode))
38830 op1 = copy_to_mode_reg (BNDmode, op1);
38832 emit_insn (BNDmode == BND64mode
38833 ? gen_bnd64_cu (op1, op0)
38834 : gen_bnd32_cu (op1, op0));
38835 return 0;
38837 case IX86_BUILTIN_BNDRET:
38838 arg0 = CALL_EXPR_ARG (exp, 0);
38839 gcc_assert (TREE_CODE (arg0) == SSA_NAME);
38840 target = chkp_get_rtl_bounds (arg0);
38842 /* If no bounds were specified for returned value,
38843 then use INIT bounds. It usually happens when
38844 some built-in function is expanded. */
38845 if (!target)
38847 rtx t1 = gen_reg_rtx (Pmode);
38848 rtx t2 = gen_reg_rtx (Pmode);
38849 target = gen_reg_rtx (BNDmode);
38850 emit_move_insn (t1, const0_rtx);
38851 emit_move_insn (t2, constm1_rtx);
38852 emit_insn (BNDmode == BND64mode
38853 ? gen_bnd64_mk (target, t1, t2)
38854 : gen_bnd32_mk (target, t1, t2));
38857 gcc_assert (target && REG_P (target));
38858 return target;
38860 case IX86_BUILTIN_BNDNARROW:
38862 rtx m1, m1h1, m1h2, lb, ub, t1;
38864 /* Return value and lb. */
38865 arg0 = CALL_EXPR_ARG (exp, 0);
38866 /* Bounds. */
38867 arg1 = CALL_EXPR_ARG (exp, 1);
38868 /* Size. */
38869 arg2 = CALL_EXPR_ARG (exp, 2);
38871 lb = expand_normal (arg0);
38872 op1 = expand_normal (arg1);
38873 op2 = expand_normal (arg2);
38875 /* Size was passed but we need to use (size - 1) as for bndmk. */
38876 op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
38877 NULL_RTX, 1, OPTAB_DIRECT);
38879 /* Add LB to size and inverse to get UB. */
38880 op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
38881 op2, 1, OPTAB_DIRECT);
38882 ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
38884 if (!register_operand (lb, Pmode))
38885 lb = ix86_zero_extend_to_Pmode (lb);
38886 if (!register_operand (ub, Pmode))
38887 ub = ix86_zero_extend_to_Pmode (ub);
38889 /* We need to move bounds to memory before any computations. */
38890 if (MEM_P (op1))
38891 m1 = op1;
38892 else
38894 m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
38895 emit_move_insn (m1, op1);
38898 /* Generate mem expression to be used for access to LB and UB. */
38899 m1h1 = adjust_address (m1, Pmode, 0);
38900 m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
38902 t1 = gen_reg_rtx (Pmode);
38904 /* Compute LB. */
38905 emit_move_insn (t1, m1h1);
38906 ix86_emit_move_max (t1, lb);
38907 emit_move_insn (m1h1, t1);
38909 /* Compute UB. UB is stored in 1's complement form. Therefore
38910 we also use max here. */
38911 emit_move_insn (t1, m1h2);
38912 ix86_emit_move_max (t1, ub);
38913 emit_move_insn (m1h2, t1);
38915 op2 = gen_reg_rtx (BNDmode);
38916 emit_move_insn (op2, m1);
38918 return chkp_join_splitted_slot (lb, op2);
38921 case IX86_BUILTIN_BNDINT:
38923 rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
38925 if (!target
38926 || GET_MODE (target) != BNDmode
38927 || !register_operand (target, BNDmode))
38928 target = gen_reg_rtx (BNDmode);
38930 arg0 = CALL_EXPR_ARG (exp, 0);
38931 arg1 = CALL_EXPR_ARG (exp, 1);
38933 op0 = expand_normal (arg0);
38934 op1 = expand_normal (arg1);
38936 res = assign_386_stack_local (BNDmode, SLOT_TEMP);
38937 rh1 = adjust_address (res, Pmode, 0);
38938 rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
38940 /* Put first bounds to temporaries. */
38941 lb1 = gen_reg_rtx (Pmode);
38942 ub1 = gen_reg_rtx (Pmode);
38943 if (MEM_P (op0))
38945 emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
38946 emit_move_insn (ub1, adjust_address (op0, Pmode,
38947 GET_MODE_SIZE (Pmode)));
38949 else
38951 emit_move_insn (res, op0);
38952 emit_move_insn (lb1, rh1);
38953 emit_move_insn (ub1, rh2);
38956 /* Put second bounds to temporaries. */
38957 lb2 = gen_reg_rtx (Pmode);
38958 ub2 = gen_reg_rtx (Pmode);
38959 if (MEM_P (op1))
38961 emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
38962 emit_move_insn (ub2, adjust_address (op1, Pmode,
38963 GET_MODE_SIZE (Pmode)));
38965 else
38967 emit_move_insn (res, op1);
38968 emit_move_insn (lb2, rh1);
38969 emit_move_insn (ub2, rh2);
38972 /* Compute LB. */
38973 ix86_emit_move_max (lb1, lb2);
38974 emit_move_insn (rh1, lb1);
38976 /* Compute UB. UB is stored in 1's complement form. Therefore
38977 we also use max here. */
38978 ix86_emit_move_max (ub1, ub2);
38979 emit_move_insn (rh2, ub1);
38981 emit_move_insn (target, res);
38983 return target;
38986 case IX86_BUILTIN_SIZEOF:
38988 tree name;
38989 rtx symbol;
38991 if (!target
38992 || GET_MODE (target) != Pmode
38993 || !register_operand (target, Pmode))
38994 target = gen_reg_rtx (Pmode);
38996 arg0 = CALL_EXPR_ARG (exp, 0);
38997 gcc_assert (TREE_CODE (arg0) == VAR_DECL);
38999 name = DECL_ASSEMBLER_NAME (arg0);
39000 symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
39002 emit_insn (Pmode == SImode
39003 ? gen_move_size_reloc_si (target, symbol)
39004 : gen_move_size_reloc_di (target, symbol));
39006 return target;
39009 case IX86_BUILTIN_BNDLOWER:
39011 rtx mem, hmem;
39013 if (!target
39014 || GET_MODE (target) != Pmode
39015 || !register_operand (target, Pmode))
39016 target = gen_reg_rtx (Pmode);
39018 arg0 = CALL_EXPR_ARG (exp, 0);
39019 op0 = expand_normal (arg0);
39021 /* We need to move bounds to memory first. */
39022 if (MEM_P (op0))
39023 mem = op0;
39024 else
39026 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
39027 emit_move_insn (mem, op0);
39030 /* Generate mem expression to access LB and load it. */
39031 hmem = adjust_address (mem, Pmode, 0);
39032 emit_move_insn (target, hmem);
39034 return target;
39037 case IX86_BUILTIN_BNDUPPER:
39039 rtx mem, hmem, res;
39041 if (!target
39042 || GET_MODE (target) != Pmode
39043 || !register_operand (target, Pmode))
39044 target = gen_reg_rtx (Pmode);
39046 arg0 = CALL_EXPR_ARG (exp, 0);
39047 op0 = expand_normal (arg0);
39049 /* We need to move bounds to memory first. */
39050 if (MEM_P (op0))
39051 mem = op0;
39052 else
39054 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
39055 emit_move_insn (mem, op0);
39058 /* Generate mem expression to access UB. */
39059 hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
39061 /* We need to inverse all bits of UB. */
39062 res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
39064 if (res != target)
39065 emit_move_insn (target, res);
39067 return target;
39070 case IX86_BUILTIN_MASKMOVQ:
39071 case IX86_BUILTIN_MASKMOVDQU:
39072 icode = (fcode == IX86_BUILTIN_MASKMOVQ
39073 ? CODE_FOR_mmx_maskmovq
39074 : CODE_FOR_sse2_maskmovdqu);
39075 /* Note the arg order is different from the operand order. */
39076 arg1 = CALL_EXPR_ARG (exp, 0);
39077 arg2 = CALL_EXPR_ARG (exp, 1);
39078 arg0 = CALL_EXPR_ARG (exp, 2);
39079 op0 = expand_normal (arg0);
39080 op1 = expand_normal (arg1);
39081 op2 = expand_normal (arg2);
39082 mode0 = insn_data[icode].operand[0].mode;
39083 mode1 = insn_data[icode].operand[1].mode;
39084 mode2 = insn_data[icode].operand[2].mode;
39086 op0 = ix86_zero_extend_to_Pmode (op0);
39087 op0 = gen_rtx_MEM (mode1, op0);
39089 if (!insn_data[icode].operand[0].predicate (op0, mode0))
39090 op0 = copy_to_mode_reg (mode0, op0);
39091 if (!insn_data[icode].operand[1].predicate (op1, mode1))
39092 op1 = copy_to_mode_reg (mode1, op1);
39093 if (!insn_data[icode].operand[2].predicate (op2, mode2))
39094 op2 = copy_to_mode_reg (mode2, op2);
39095 pat = GEN_FCN (icode) (op0, op1, op2);
39096 if (! pat)
39097 return 0;
39098 emit_insn (pat);
39099 return 0;
39101 case IX86_BUILTIN_LDMXCSR:
39102 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
39103 target = assign_386_stack_local (SImode, SLOT_TEMP);
39104 emit_move_insn (target, op0);
39105 emit_insn (gen_sse_ldmxcsr (target));
39106 return 0;
39108 case IX86_BUILTIN_STMXCSR:
39109 target = assign_386_stack_local (SImode, SLOT_TEMP);
39110 emit_insn (gen_sse_stmxcsr (target));
39111 return copy_to_mode_reg (SImode, target);
39113 case IX86_BUILTIN_CLFLUSH:
39114 arg0 = CALL_EXPR_ARG (exp, 0);
39115 op0 = expand_normal (arg0);
39116 icode = CODE_FOR_sse2_clflush;
39117 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39118 op0 = ix86_zero_extend_to_Pmode (op0);
39120 emit_insn (gen_sse2_clflush (op0));
39121 return 0;
39123 case IX86_BUILTIN_CLWB:
39124 arg0 = CALL_EXPR_ARG (exp, 0);
39125 op0 = expand_normal (arg0);
39126 icode = CODE_FOR_clwb;
39127 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39128 op0 = ix86_zero_extend_to_Pmode (op0);
39130 emit_insn (gen_clwb (op0));
39131 return 0;
39133 case IX86_BUILTIN_CLFLUSHOPT:
39134 arg0 = CALL_EXPR_ARG (exp, 0);
39135 op0 = expand_normal (arg0);
39136 icode = CODE_FOR_clflushopt;
39137 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39138 op0 = ix86_zero_extend_to_Pmode (op0);
39140 emit_insn (gen_clflushopt (op0));
39141 return 0;
39143 case IX86_BUILTIN_MONITOR:
39144 case IX86_BUILTIN_MONITORX:
39145 arg0 = CALL_EXPR_ARG (exp, 0);
39146 arg1 = CALL_EXPR_ARG (exp, 1);
39147 arg2 = CALL_EXPR_ARG (exp, 2);
39148 op0 = expand_normal (arg0);
39149 op1 = expand_normal (arg1);
39150 op2 = expand_normal (arg2);
39151 if (!REG_P (op0))
39152 op0 = ix86_zero_extend_to_Pmode (op0);
39153 if (!REG_P (op1))
39154 op1 = copy_to_mode_reg (SImode, op1);
39155 if (!REG_P (op2))
39156 op2 = copy_to_mode_reg (SImode, op2);
39158 emit_insn (fcode == IX86_BUILTIN_MONITOR
39159 ? ix86_gen_monitor (op0, op1, op2)
39160 : ix86_gen_monitorx (op0, op1, op2));
39161 return 0;
39163 case IX86_BUILTIN_MWAIT:
39164 arg0 = CALL_EXPR_ARG (exp, 0);
39165 arg1 = CALL_EXPR_ARG (exp, 1);
39166 op0 = expand_normal (arg0);
39167 op1 = expand_normal (arg1);
39168 if (!REG_P (op0))
39169 op0 = copy_to_mode_reg (SImode, op0);
39170 if (!REG_P (op1))
39171 op1 = copy_to_mode_reg (SImode, op1);
39172 emit_insn (gen_sse3_mwait (op0, op1));
39173 return 0;
39175 case IX86_BUILTIN_MWAITX:
39176 arg0 = CALL_EXPR_ARG (exp, 0);
39177 arg1 = CALL_EXPR_ARG (exp, 1);
39178 arg2 = CALL_EXPR_ARG (exp, 2);
39179 op0 = expand_normal (arg0);
39180 op1 = expand_normal (arg1);
39181 op2 = expand_normal (arg2);
39182 if (!REG_P (op0))
39183 op0 = copy_to_mode_reg (SImode, op0);
39184 if (!REG_P (op1))
39185 op1 = copy_to_mode_reg (SImode, op1);
39186 if (!REG_P (op2))
39187 op2 = copy_to_mode_reg (SImode, op2);
39188 emit_insn (gen_mwaitx (op0, op1, op2));
39189 return 0;
39191 case IX86_BUILTIN_VEC_INIT_V2SI:
39192 case IX86_BUILTIN_VEC_INIT_V4HI:
39193 case IX86_BUILTIN_VEC_INIT_V8QI:
39194 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
39196 case IX86_BUILTIN_VEC_EXT_V2DF:
39197 case IX86_BUILTIN_VEC_EXT_V2DI:
39198 case IX86_BUILTIN_VEC_EXT_V4SF:
39199 case IX86_BUILTIN_VEC_EXT_V4SI:
39200 case IX86_BUILTIN_VEC_EXT_V8HI:
39201 case IX86_BUILTIN_VEC_EXT_V2SI:
39202 case IX86_BUILTIN_VEC_EXT_V4HI:
39203 case IX86_BUILTIN_VEC_EXT_V16QI:
39204 return ix86_expand_vec_ext_builtin (exp, target);
39206 case IX86_BUILTIN_VEC_SET_V2DI:
39207 case IX86_BUILTIN_VEC_SET_V4SF:
39208 case IX86_BUILTIN_VEC_SET_V4SI:
39209 case IX86_BUILTIN_VEC_SET_V8HI:
39210 case IX86_BUILTIN_VEC_SET_V4HI:
39211 case IX86_BUILTIN_VEC_SET_V16QI:
39212 return ix86_expand_vec_set_builtin (exp);
39214 case IX86_BUILTIN_INFQ:
39215 case IX86_BUILTIN_HUGE_VALQ:
39217 REAL_VALUE_TYPE inf;
39218 rtx tmp;
39220 real_inf (&inf);
39221 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
39223 tmp = validize_mem (force_const_mem (mode, tmp));
39225 if (target == 0)
39226 target = gen_reg_rtx (mode);
39228 emit_move_insn (target, tmp);
39229 return target;
39232 case IX86_BUILTIN_RDPMC:
39233 case IX86_BUILTIN_RDTSC:
39234 case IX86_BUILTIN_RDTSCP:
39236 op0 = gen_reg_rtx (DImode);
39237 op1 = gen_reg_rtx (DImode);
39239 if (fcode == IX86_BUILTIN_RDPMC)
39241 arg0 = CALL_EXPR_ARG (exp, 0);
39242 op2 = expand_normal (arg0);
39243 if (!register_operand (op2, SImode))
39244 op2 = copy_to_mode_reg (SImode, op2);
39246 insn = (TARGET_64BIT
39247 ? gen_rdpmc_rex64 (op0, op1, op2)
39248 : gen_rdpmc (op0, op2));
39249 emit_insn (insn);
39251 else if (fcode == IX86_BUILTIN_RDTSC)
39253 insn = (TARGET_64BIT
39254 ? gen_rdtsc_rex64 (op0, op1)
39255 : gen_rdtsc (op0));
39256 emit_insn (insn);
39258 else
39260 op2 = gen_reg_rtx (SImode);
39262 insn = (TARGET_64BIT
39263 ? gen_rdtscp_rex64 (op0, op1, op2)
39264 : gen_rdtscp (op0, op2));
39265 emit_insn (insn);
39267 arg0 = CALL_EXPR_ARG (exp, 0);
39268 op4 = expand_normal (arg0);
39269 if (!address_operand (op4, VOIDmode))
39271 op4 = convert_memory_address (Pmode, op4);
39272 op4 = copy_addr_to_reg (op4);
39274 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
39277 if (target == 0)
39279 /* mode is VOIDmode if __builtin_rd* has been called
39280 without lhs. */
39281 if (mode == VOIDmode)
39282 return target;
39283 target = gen_reg_rtx (mode);
39286 if (TARGET_64BIT)
39288 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
39289 op1, 1, OPTAB_DIRECT);
39290 op0 = expand_simple_binop (DImode, IOR, op0, op1,
39291 op0, 1, OPTAB_DIRECT);
39294 emit_move_insn (target, op0);
39295 return target;
39297 case IX86_BUILTIN_FXSAVE:
39298 case IX86_BUILTIN_FXRSTOR:
39299 case IX86_BUILTIN_FXSAVE64:
39300 case IX86_BUILTIN_FXRSTOR64:
39301 case IX86_BUILTIN_FNSTENV:
39302 case IX86_BUILTIN_FLDENV:
39303 mode0 = BLKmode;
39304 switch (fcode)
39306 case IX86_BUILTIN_FXSAVE:
39307 icode = CODE_FOR_fxsave;
39308 break;
39309 case IX86_BUILTIN_FXRSTOR:
39310 icode = CODE_FOR_fxrstor;
39311 break;
39312 case IX86_BUILTIN_FXSAVE64:
39313 icode = CODE_FOR_fxsave64;
39314 break;
39315 case IX86_BUILTIN_FXRSTOR64:
39316 icode = CODE_FOR_fxrstor64;
39317 break;
39318 case IX86_BUILTIN_FNSTENV:
39319 icode = CODE_FOR_fnstenv;
39320 break;
39321 case IX86_BUILTIN_FLDENV:
39322 icode = CODE_FOR_fldenv;
39323 break;
39324 default:
39325 gcc_unreachable ();
39328 arg0 = CALL_EXPR_ARG (exp, 0);
39329 op0 = expand_normal (arg0);
39331 if (!address_operand (op0, VOIDmode))
39333 op0 = convert_memory_address (Pmode, op0);
39334 op0 = copy_addr_to_reg (op0);
39336 op0 = gen_rtx_MEM (mode0, op0);
39338 pat = GEN_FCN (icode) (op0);
39339 if (pat)
39340 emit_insn (pat);
39341 return 0;
39343 case IX86_BUILTIN_XSAVE:
39344 case IX86_BUILTIN_XRSTOR:
39345 case IX86_BUILTIN_XSAVE64:
39346 case IX86_BUILTIN_XRSTOR64:
39347 case IX86_BUILTIN_XSAVEOPT:
39348 case IX86_BUILTIN_XSAVEOPT64:
39349 case IX86_BUILTIN_XSAVES:
39350 case IX86_BUILTIN_XRSTORS:
39351 case IX86_BUILTIN_XSAVES64:
39352 case IX86_BUILTIN_XRSTORS64:
39353 case IX86_BUILTIN_XSAVEC:
39354 case IX86_BUILTIN_XSAVEC64:
39355 arg0 = CALL_EXPR_ARG (exp, 0);
39356 arg1 = CALL_EXPR_ARG (exp, 1);
39357 op0 = expand_normal (arg0);
39358 op1 = expand_normal (arg1);
39360 if (!address_operand (op0, VOIDmode))
39362 op0 = convert_memory_address (Pmode, op0);
39363 op0 = copy_addr_to_reg (op0);
39365 op0 = gen_rtx_MEM (BLKmode, op0);
39367 op1 = force_reg (DImode, op1);
39369 if (TARGET_64BIT)
39371 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
39372 NULL, 1, OPTAB_DIRECT);
39373 switch (fcode)
39375 case IX86_BUILTIN_XSAVE:
39376 icode = CODE_FOR_xsave_rex64;
39377 break;
39378 case IX86_BUILTIN_XRSTOR:
39379 icode = CODE_FOR_xrstor_rex64;
39380 break;
39381 case IX86_BUILTIN_XSAVE64:
39382 icode = CODE_FOR_xsave64;
39383 break;
39384 case IX86_BUILTIN_XRSTOR64:
39385 icode = CODE_FOR_xrstor64;
39386 break;
39387 case IX86_BUILTIN_XSAVEOPT:
39388 icode = CODE_FOR_xsaveopt_rex64;
39389 break;
39390 case IX86_BUILTIN_XSAVEOPT64:
39391 icode = CODE_FOR_xsaveopt64;
39392 break;
39393 case IX86_BUILTIN_XSAVES:
39394 icode = CODE_FOR_xsaves_rex64;
39395 break;
39396 case IX86_BUILTIN_XRSTORS:
39397 icode = CODE_FOR_xrstors_rex64;
39398 break;
39399 case IX86_BUILTIN_XSAVES64:
39400 icode = CODE_FOR_xsaves64;
39401 break;
39402 case IX86_BUILTIN_XRSTORS64:
39403 icode = CODE_FOR_xrstors64;
39404 break;
39405 case IX86_BUILTIN_XSAVEC:
39406 icode = CODE_FOR_xsavec_rex64;
39407 break;
39408 case IX86_BUILTIN_XSAVEC64:
39409 icode = CODE_FOR_xsavec64;
39410 break;
39411 default:
39412 gcc_unreachable ();
39415 op2 = gen_lowpart (SImode, op2);
39416 op1 = gen_lowpart (SImode, op1);
39417 pat = GEN_FCN (icode) (op0, op1, op2);
39419 else
39421 switch (fcode)
39423 case IX86_BUILTIN_XSAVE:
39424 icode = CODE_FOR_xsave;
39425 break;
39426 case IX86_BUILTIN_XRSTOR:
39427 icode = CODE_FOR_xrstor;
39428 break;
39429 case IX86_BUILTIN_XSAVEOPT:
39430 icode = CODE_FOR_xsaveopt;
39431 break;
39432 case IX86_BUILTIN_XSAVES:
39433 icode = CODE_FOR_xsaves;
39434 break;
39435 case IX86_BUILTIN_XRSTORS:
39436 icode = CODE_FOR_xrstors;
39437 break;
39438 case IX86_BUILTIN_XSAVEC:
39439 icode = CODE_FOR_xsavec;
39440 break;
39441 default:
39442 gcc_unreachable ();
39444 pat = GEN_FCN (icode) (op0, op1);
39447 if (pat)
39448 emit_insn (pat);
39449 return 0;
39451 case IX86_BUILTIN_LLWPCB:
39452 arg0 = CALL_EXPR_ARG (exp, 0);
39453 op0 = expand_normal (arg0);
39454 icode = CODE_FOR_lwp_llwpcb;
39455 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39456 op0 = ix86_zero_extend_to_Pmode (op0);
39457 emit_insn (gen_lwp_llwpcb (op0));
39458 return 0;
39460 case IX86_BUILTIN_SLWPCB:
39461 icode = CODE_FOR_lwp_slwpcb;
39462 if (!target
39463 || !insn_data[icode].operand[0].predicate (target, Pmode))
39464 target = gen_reg_rtx (Pmode);
39465 emit_insn (gen_lwp_slwpcb (target));
39466 return target;
39468 case IX86_BUILTIN_BEXTRI32:
39469 case IX86_BUILTIN_BEXTRI64:
39470 arg0 = CALL_EXPR_ARG (exp, 0);
39471 arg1 = CALL_EXPR_ARG (exp, 1);
39472 op0 = expand_normal (arg0);
39473 op1 = expand_normal (arg1);
39474 icode = (fcode == IX86_BUILTIN_BEXTRI32
39475 ? CODE_FOR_tbm_bextri_si
39476 : CODE_FOR_tbm_bextri_di);
39477 if (!CONST_INT_P (op1))
39479 error ("last argument must be an immediate");
39480 return const0_rtx;
39482 else
39484 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
39485 unsigned char lsb_index = INTVAL (op1) & 0xFF;
39486 op1 = GEN_INT (length);
39487 op2 = GEN_INT (lsb_index);
39488 pat = GEN_FCN (icode) (target, op0, op1, op2);
39489 if (pat)
39490 emit_insn (pat);
39491 return target;
39494 case IX86_BUILTIN_RDRAND16_STEP:
39495 icode = CODE_FOR_rdrandhi_1;
39496 mode0 = HImode;
39497 goto rdrand_step;
39499 case IX86_BUILTIN_RDRAND32_STEP:
39500 icode = CODE_FOR_rdrandsi_1;
39501 mode0 = SImode;
39502 goto rdrand_step;
39504 case IX86_BUILTIN_RDRAND64_STEP:
39505 icode = CODE_FOR_rdranddi_1;
39506 mode0 = DImode;
39508 rdrand_step:
39509 op0 = gen_reg_rtx (mode0);
39510 emit_insn (GEN_FCN (icode) (op0));
39512 arg0 = CALL_EXPR_ARG (exp, 0);
39513 op1 = expand_normal (arg0);
39514 if (!address_operand (op1, VOIDmode))
39516 op1 = convert_memory_address (Pmode, op1);
39517 op1 = copy_addr_to_reg (op1);
39519 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39521 op1 = gen_reg_rtx (SImode);
39522 emit_move_insn (op1, CONST1_RTX (SImode));
39524 /* Emit SImode conditional move. */
39525 if (mode0 == HImode)
39527 op2 = gen_reg_rtx (SImode);
39528 emit_insn (gen_zero_extendhisi2 (op2, op0));
39530 else if (mode0 == SImode)
39531 op2 = op0;
39532 else
39533 op2 = gen_rtx_SUBREG (SImode, op0, 0);
39535 if (target == 0
39536 || !register_operand (target, SImode))
39537 target = gen_reg_rtx (SImode);
39539 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
39540 const0_rtx);
39541 emit_insn (gen_rtx_SET (target,
39542 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
39543 return target;
39545 case IX86_BUILTIN_RDSEED16_STEP:
39546 icode = CODE_FOR_rdseedhi_1;
39547 mode0 = HImode;
39548 goto rdseed_step;
39550 case IX86_BUILTIN_RDSEED32_STEP:
39551 icode = CODE_FOR_rdseedsi_1;
39552 mode0 = SImode;
39553 goto rdseed_step;
39555 case IX86_BUILTIN_RDSEED64_STEP:
39556 icode = CODE_FOR_rdseeddi_1;
39557 mode0 = DImode;
39559 rdseed_step:
39560 op0 = gen_reg_rtx (mode0);
39561 emit_insn (GEN_FCN (icode) (op0));
39563 arg0 = CALL_EXPR_ARG (exp, 0);
39564 op1 = expand_normal (arg0);
39565 if (!address_operand (op1, VOIDmode))
39567 op1 = convert_memory_address (Pmode, op1);
39568 op1 = copy_addr_to_reg (op1);
39570 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39572 op2 = gen_reg_rtx (QImode);
39574 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
39575 const0_rtx);
39576 emit_insn (gen_rtx_SET (op2, pat));
39578 if (target == 0
39579 || !register_operand (target, SImode))
39580 target = gen_reg_rtx (SImode);
39582 emit_insn (gen_zero_extendqisi2 (target, op2));
39583 return target;
39585 case IX86_BUILTIN_SBB32:
39586 icode = CODE_FOR_subsi3_carry;
39587 mode0 = SImode;
39588 goto addcarryx;
39590 case IX86_BUILTIN_SBB64:
39591 icode = CODE_FOR_subdi3_carry;
39592 mode0 = DImode;
39593 goto addcarryx;
39595 case IX86_BUILTIN_ADDCARRYX32:
39596 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
39597 mode0 = SImode;
39598 goto addcarryx;
39600 case IX86_BUILTIN_ADDCARRYX64:
39601 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
39602 mode0 = DImode;
39604 addcarryx:
39605 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
39606 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
39607 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
39608 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
39610 op0 = gen_reg_rtx (QImode);
39612 /* Generate CF from input operand. */
39613 op1 = expand_normal (arg0);
39614 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
39615 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
39617 /* Gen ADCX instruction to compute X+Y+CF. */
39618 op2 = expand_normal (arg1);
39619 op3 = expand_normal (arg2);
39621 if (!REG_P (op2))
39622 op2 = copy_to_mode_reg (mode0, op2);
39623 if (!REG_P (op3))
39624 op3 = copy_to_mode_reg (mode0, op3);
39626 op0 = gen_reg_rtx (mode0);
39628 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
39629 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
39630 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
39632 /* Store the result. */
39633 op4 = expand_normal (arg3);
39634 if (!address_operand (op4, VOIDmode))
39636 op4 = convert_memory_address (Pmode, op4);
39637 op4 = copy_addr_to_reg (op4);
39639 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
39641 /* Return current CF value. */
39642 if (target == 0)
39643 target = gen_reg_rtx (QImode);
39645 PUT_MODE (pat, QImode);
39646 emit_insn (gen_rtx_SET (target, pat));
39647 return target;
39649 case IX86_BUILTIN_READ_FLAGS:
39650 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
39652 if (optimize
39653 || target == NULL_RTX
39654 || !nonimmediate_operand (target, word_mode)
39655 || GET_MODE (target) != word_mode)
39656 target = gen_reg_rtx (word_mode);
39658 emit_insn (gen_pop (target));
39659 return target;
39661 case IX86_BUILTIN_WRITE_FLAGS:
39663 arg0 = CALL_EXPR_ARG (exp, 0);
39664 op0 = expand_normal (arg0);
39665 if (!general_no_elim_operand (op0, word_mode))
39666 op0 = copy_to_mode_reg (word_mode, op0);
39668 emit_insn (gen_push (op0));
39669 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
39670 return 0;
39672 case IX86_BUILTIN_KORTESTC16:
39673 icode = CODE_FOR_kortestchi;
39674 mode0 = HImode;
39675 mode1 = CCCmode;
39676 goto kortest;
39678 case IX86_BUILTIN_KORTESTZ16:
39679 icode = CODE_FOR_kortestzhi;
39680 mode0 = HImode;
39681 mode1 = CCZmode;
39683 kortest:
39684 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
39685 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
39686 op0 = expand_normal (arg0);
39687 op1 = expand_normal (arg1);
39689 op0 = copy_to_reg (op0);
39690 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
39691 op1 = copy_to_reg (op1);
39692 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
39694 target = gen_reg_rtx (QImode);
39695 emit_insn (gen_rtx_SET (target, const0_rtx));
39697 /* Emit kortest. */
39698 emit_insn (GEN_FCN (icode) (op0, op1));
39699 /* And use setcc to return result from flags. */
39700 ix86_expand_setcc (target, EQ,
39701 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
39702 return target;
39704 case IX86_BUILTIN_GATHERSIV2DF:
39705 icode = CODE_FOR_avx2_gathersiv2df;
39706 goto gather_gen;
39707 case IX86_BUILTIN_GATHERSIV4DF:
39708 icode = CODE_FOR_avx2_gathersiv4df;
39709 goto gather_gen;
39710 case IX86_BUILTIN_GATHERDIV2DF:
39711 icode = CODE_FOR_avx2_gatherdiv2df;
39712 goto gather_gen;
39713 case IX86_BUILTIN_GATHERDIV4DF:
39714 icode = CODE_FOR_avx2_gatherdiv4df;
39715 goto gather_gen;
39716 case IX86_BUILTIN_GATHERSIV4SF:
39717 icode = CODE_FOR_avx2_gathersiv4sf;
39718 goto gather_gen;
39719 case IX86_BUILTIN_GATHERSIV8SF:
39720 icode = CODE_FOR_avx2_gathersiv8sf;
39721 goto gather_gen;
39722 case IX86_BUILTIN_GATHERDIV4SF:
39723 icode = CODE_FOR_avx2_gatherdiv4sf;
39724 goto gather_gen;
39725 case IX86_BUILTIN_GATHERDIV8SF:
39726 icode = CODE_FOR_avx2_gatherdiv8sf;
39727 goto gather_gen;
39728 case IX86_BUILTIN_GATHERSIV2DI:
39729 icode = CODE_FOR_avx2_gathersiv2di;
39730 goto gather_gen;
39731 case IX86_BUILTIN_GATHERSIV4DI:
39732 icode = CODE_FOR_avx2_gathersiv4di;
39733 goto gather_gen;
39734 case IX86_BUILTIN_GATHERDIV2DI:
39735 icode = CODE_FOR_avx2_gatherdiv2di;
39736 goto gather_gen;
39737 case IX86_BUILTIN_GATHERDIV4DI:
39738 icode = CODE_FOR_avx2_gatherdiv4di;
39739 goto gather_gen;
39740 case IX86_BUILTIN_GATHERSIV4SI:
39741 icode = CODE_FOR_avx2_gathersiv4si;
39742 goto gather_gen;
39743 case IX86_BUILTIN_GATHERSIV8SI:
39744 icode = CODE_FOR_avx2_gathersiv8si;
39745 goto gather_gen;
39746 case IX86_BUILTIN_GATHERDIV4SI:
39747 icode = CODE_FOR_avx2_gatherdiv4si;
39748 goto gather_gen;
39749 case IX86_BUILTIN_GATHERDIV8SI:
39750 icode = CODE_FOR_avx2_gatherdiv8si;
39751 goto gather_gen;
39752 case IX86_BUILTIN_GATHERALTSIV4DF:
39753 icode = CODE_FOR_avx2_gathersiv4df;
39754 goto gather_gen;
39755 case IX86_BUILTIN_GATHERALTDIV8SF:
39756 icode = CODE_FOR_avx2_gatherdiv8sf;
39757 goto gather_gen;
39758 case IX86_BUILTIN_GATHERALTSIV4DI:
39759 icode = CODE_FOR_avx2_gathersiv4di;
39760 goto gather_gen;
39761 case IX86_BUILTIN_GATHERALTDIV8SI:
39762 icode = CODE_FOR_avx2_gatherdiv8si;
39763 goto gather_gen;
39764 case IX86_BUILTIN_GATHER3SIV16SF:
39765 icode = CODE_FOR_avx512f_gathersiv16sf;
39766 goto gather_gen;
39767 case IX86_BUILTIN_GATHER3SIV8DF:
39768 icode = CODE_FOR_avx512f_gathersiv8df;
39769 goto gather_gen;
39770 case IX86_BUILTIN_GATHER3DIV16SF:
39771 icode = CODE_FOR_avx512f_gatherdiv16sf;
39772 goto gather_gen;
39773 case IX86_BUILTIN_GATHER3DIV8DF:
39774 icode = CODE_FOR_avx512f_gatherdiv8df;
39775 goto gather_gen;
39776 case IX86_BUILTIN_GATHER3SIV16SI:
39777 icode = CODE_FOR_avx512f_gathersiv16si;
39778 goto gather_gen;
39779 case IX86_BUILTIN_GATHER3SIV8DI:
39780 icode = CODE_FOR_avx512f_gathersiv8di;
39781 goto gather_gen;
39782 case IX86_BUILTIN_GATHER3DIV16SI:
39783 icode = CODE_FOR_avx512f_gatherdiv16si;
39784 goto gather_gen;
39785 case IX86_BUILTIN_GATHER3DIV8DI:
39786 icode = CODE_FOR_avx512f_gatherdiv8di;
39787 goto gather_gen;
39788 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39789 icode = CODE_FOR_avx512f_gathersiv8df;
39790 goto gather_gen;
39791 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39792 icode = CODE_FOR_avx512f_gatherdiv16sf;
39793 goto gather_gen;
39794 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39795 icode = CODE_FOR_avx512f_gathersiv8di;
39796 goto gather_gen;
39797 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39798 icode = CODE_FOR_avx512f_gatherdiv16si;
39799 goto gather_gen;
39800 case IX86_BUILTIN_GATHER3SIV2DF:
39801 icode = CODE_FOR_avx512vl_gathersiv2df;
39802 goto gather_gen;
39803 case IX86_BUILTIN_GATHER3SIV4DF:
39804 icode = CODE_FOR_avx512vl_gathersiv4df;
39805 goto gather_gen;
39806 case IX86_BUILTIN_GATHER3DIV2DF:
39807 icode = CODE_FOR_avx512vl_gatherdiv2df;
39808 goto gather_gen;
39809 case IX86_BUILTIN_GATHER3DIV4DF:
39810 icode = CODE_FOR_avx512vl_gatherdiv4df;
39811 goto gather_gen;
39812 case IX86_BUILTIN_GATHER3SIV4SF:
39813 icode = CODE_FOR_avx512vl_gathersiv4sf;
39814 goto gather_gen;
39815 case IX86_BUILTIN_GATHER3SIV8SF:
39816 icode = CODE_FOR_avx512vl_gathersiv8sf;
39817 goto gather_gen;
39818 case IX86_BUILTIN_GATHER3DIV4SF:
39819 icode = CODE_FOR_avx512vl_gatherdiv4sf;
39820 goto gather_gen;
39821 case IX86_BUILTIN_GATHER3DIV8SF:
39822 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39823 goto gather_gen;
39824 case IX86_BUILTIN_GATHER3SIV2DI:
39825 icode = CODE_FOR_avx512vl_gathersiv2di;
39826 goto gather_gen;
39827 case IX86_BUILTIN_GATHER3SIV4DI:
39828 icode = CODE_FOR_avx512vl_gathersiv4di;
39829 goto gather_gen;
39830 case IX86_BUILTIN_GATHER3DIV2DI:
39831 icode = CODE_FOR_avx512vl_gatherdiv2di;
39832 goto gather_gen;
39833 case IX86_BUILTIN_GATHER3DIV4DI:
39834 icode = CODE_FOR_avx512vl_gatherdiv4di;
39835 goto gather_gen;
39836 case IX86_BUILTIN_GATHER3SIV4SI:
39837 icode = CODE_FOR_avx512vl_gathersiv4si;
39838 goto gather_gen;
39839 case IX86_BUILTIN_GATHER3SIV8SI:
39840 icode = CODE_FOR_avx512vl_gathersiv8si;
39841 goto gather_gen;
39842 case IX86_BUILTIN_GATHER3DIV4SI:
39843 icode = CODE_FOR_avx512vl_gatherdiv4si;
39844 goto gather_gen;
39845 case IX86_BUILTIN_GATHER3DIV8SI:
39846 icode = CODE_FOR_avx512vl_gatherdiv8si;
39847 goto gather_gen;
39848 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39849 icode = CODE_FOR_avx512vl_gathersiv4df;
39850 goto gather_gen;
39851 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39852 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39853 goto gather_gen;
39854 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39855 icode = CODE_FOR_avx512vl_gathersiv4di;
39856 goto gather_gen;
39857 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39858 icode = CODE_FOR_avx512vl_gatherdiv8si;
39859 goto gather_gen;
39860 case IX86_BUILTIN_SCATTERSIV16SF:
39861 icode = CODE_FOR_avx512f_scattersiv16sf;
39862 goto scatter_gen;
39863 case IX86_BUILTIN_SCATTERSIV8DF:
39864 icode = CODE_FOR_avx512f_scattersiv8df;
39865 goto scatter_gen;
39866 case IX86_BUILTIN_SCATTERDIV16SF:
39867 icode = CODE_FOR_avx512f_scatterdiv16sf;
39868 goto scatter_gen;
39869 case IX86_BUILTIN_SCATTERDIV8DF:
39870 icode = CODE_FOR_avx512f_scatterdiv8df;
39871 goto scatter_gen;
39872 case IX86_BUILTIN_SCATTERSIV16SI:
39873 icode = CODE_FOR_avx512f_scattersiv16si;
39874 goto scatter_gen;
39875 case IX86_BUILTIN_SCATTERSIV8DI:
39876 icode = CODE_FOR_avx512f_scattersiv8di;
39877 goto scatter_gen;
39878 case IX86_BUILTIN_SCATTERDIV16SI:
39879 icode = CODE_FOR_avx512f_scatterdiv16si;
39880 goto scatter_gen;
39881 case IX86_BUILTIN_SCATTERDIV8DI:
39882 icode = CODE_FOR_avx512f_scatterdiv8di;
39883 goto scatter_gen;
39884 case IX86_BUILTIN_SCATTERSIV8SF:
39885 icode = CODE_FOR_avx512vl_scattersiv8sf;
39886 goto scatter_gen;
39887 case IX86_BUILTIN_SCATTERSIV4SF:
39888 icode = CODE_FOR_avx512vl_scattersiv4sf;
39889 goto scatter_gen;
39890 case IX86_BUILTIN_SCATTERSIV4DF:
39891 icode = CODE_FOR_avx512vl_scattersiv4df;
39892 goto scatter_gen;
39893 case IX86_BUILTIN_SCATTERSIV2DF:
39894 icode = CODE_FOR_avx512vl_scattersiv2df;
39895 goto scatter_gen;
39896 case IX86_BUILTIN_SCATTERDIV8SF:
39897 icode = CODE_FOR_avx512vl_scatterdiv8sf;
39898 goto scatter_gen;
39899 case IX86_BUILTIN_SCATTERDIV4SF:
39900 icode = CODE_FOR_avx512vl_scatterdiv4sf;
39901 goto scatter_gen;
39902 case IX86_BUILTIN_SCATTERDIV4DF:
39903 icode = CODE_FOR_avx512vl_scatterdiv4df;
39904 goto scatter_gen;
39905 case IX86_BUILTIN_SCATTERDIV2DF:
39906 icode = CODE_FOR_avx512vl_scatterdiv2df;
39907 goto scatter_gen;
39908 case IX86_BUILTIN_SCATTERSIV8SI:
39909 icode = CODE_FOR_avx512vl_scattersiv8si;
39910 goto scatter_gen;
39911 case IX86_BUILTIN_SCATTERSIV4SI:
39912 icode = CODE_FOR_avx512vl_scattersiv4si;
39913 goto scatter_gen;
39914 case IX86_BUILTIN_SCATTERSIV4DI:
39915 icode = CODE_FOR_avx512vl_scattersiv4di;
39916 goto scatter_gen;
39917 case IX86_BUILTIN_SCATTERSIV2DI:
39918 icode = CODE_FOR_avx512vl_scattersiv2di;
39919 goto scatter_gen;
39920 case IX86_BUILTIN_SCATTERDIV8SI:
39921 icode = CODE_FOR_avx512vl_scatterdiv8si;
39922 goto scatter_gen;
39923 case IX86_BUILTIN_SCATTERDIV4SI:
39924 icode = CODE_FOR_avx512vl_scatterdiv4si;
39925 goto scatter_gen;
39926 case IX86_BUILTIN_SCATTERDIV4DI:
39927 icode = CODE_FOR_avx512vl_scatterdiv4di;
39928 goto scatter_gen;
39929 case IX86_BUILTIN_SCATTERDIV2DI:
39930 icode = CODE_FOR_avx512vl_scatterdiv2di;
39931 goto scatter_gen;
39932 case IX86_BUILTIN_GATHERPFDPD:
39933 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
39934 goto vec_prefetch_gen;
39935 case IX86_BUILTIN_GATHERPFDPS:
39936 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
39937 goto vec_prefetch_gen;
39938 case IX86_BUILTIN_GATHERPFQPD:
39939 icode = CODE_FOR_avx512pf_gatherpfv8didf;
39940 goto vec_prefetch_gen;
39941 case IX86_BUILTIN_GATHERPFQPS:
39942 icode = CODE_FOR_avx512pf_gatherpfv8disf;
39943 goto vec_prefetch_gen;
39944 case IX86_BUILTIN_SCATTERPFDPD:
39945 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
39946 goto vec_prefetch_gen;
39947 case IX86_BUILTIN_SCATTERPFDPS:
39948 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
39949 goto vec_prefetch_gen;
39950 case IX86_BUILTIN_SCATTERPFQPD:
39951 icode = CODE_FOR_avx512pf_scatterpfv8didf;
39952 goto vec_prefetch_gen;
39953 case IX86_BUILTIN_SCATTERPFQPS:
39954 icode = CODE_FOR_avx512pf_scatterpfv8disf;
39955 goto vec_prefetch_gen;
39957 gather_gen:
39958 rtx half;
39959 rtx (*gen) (rtx, rtx);
39961 arg0 = CALL_EXPR_ARG (exp, 0);
39962 arg1 = CALL_EXPR_ARG (exp, 1);
39963 arg2 = CALL_EXPR_ARG (exp, 2);
39964 arg3 = CALL_EXPR_ARG (exp, 3);
39965 arg4 = CALL_EXPR_ARG (exp, 4);
39966 op0 = expand_normal (arg0);
39967 op1 = expand_normal (arg1);
39968 op2 = expand_normal (arg2);
39969 op3 = expand_normal (arg3);
39970 op4 = expand_normal (arg4);
39971 /* Note the arg order is different from the operand order. */
39972 mode0 = insn_data[icode].operand[1].mode;
39973 mode2 = insn_data[icode].operand[3].mode;
39974 mode3 = insn_data[icode].operand[4].mode;
39975 mode4 = insn_data[icode].operand[5].mode;
39977 if (target == NULL_RTX
39978 || GET_MODE (target) != insn_data[icode].operand[0].mode
39979 || !insn_data[icode].operand[0].predicate (target,
39980 GET_MODE (target)))
39981 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
39982 else
39983 subtarget = target;
39985 switch (fcode)
39987 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39988 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39989 half = gen_reg_rtx (V8SImode);
39990 if (!nonimmediate_operand (op2, V16SImode))
39991 op2 = copy_to_mode_reg (V16SImode, op2);
39992 emit_insn (gen_vec_extract_lo_v16si (half, op2));
39993 op2 = half;
39994 break;
39995 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39996 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39997 case IX86_BUILTIN_GATHERALTSIV4DF:
39998 case IX86_BUILTIN_GATHERALTSIV4DI:
39999 half = gen_reg_rtx (V4SImode);
40000 if (!nonimmediate_operand (op2, V8SImode))
40001 op2 = copy_to_mode_reg (V8SImode, op2);
40002 emit_insn (gen_vec_extract_lo_v8si (half, op2));
40003 op2 = half;
40004 break;
40005 case IX86_BUILTIN_GATHER3ALTDIV16SF:
40006 case IX86_BUILTIN_GATHER3ALTDIV16SI:
40007 half = gen_reg_rtx (mode0);
40008 if (mode0 == V8SFmode)
40009 gen = gen_vec_extract_lo_v16sf;
40010 else
40011 gen = gen_vec_extract_lo_v16si;
40012 if (!nonimmediate_operand (op0, GET_MODE (op0)))
40013 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
40014 emit_insn (gen (half, op0));
40015 op0 = half;
40016 if (GET_MODE (op3) != VOIDmode)
40018 if (!nonimmediate_operand (op3, GET_MODE (op3)))
40019 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
40020 emit_insn (gen (half, op3));
40021 op3 = half;
40023 break;
40024 case IX86_BUILTIN_GATHER3ALTDIV8SF:
40025 case IX86_BUILTIN_GATHER3ALTDIV8SI:
40026 case IX86_BUILTIN_GATHERALTDIV8SF:
40027 case IX86_BUILTIN_GATHERALTDIV8SI:
40028 half = gen_reg_rtx (mode0);
40029 if (mode0 == V4SFmode)
40030 gen = gen_vec_extract_lo_v8sf;
40031 else
40032 gen = gen_vec_extract_lo_v8si;
40033 if (!nonimmediate_operand (op0, GET_MODE (op0)))
40034 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
40035 emit_insn (gen (half, op0));
40036 op0 = half;
40037 if (GET_MODE (op3) != VOIDmode)
40039 if (!nonimmediate_operand (op3, GET_MODE (op3)))
40040 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
40041 emit_insn (gen (half, op3));
40042 op3 = half;
40044 break;
40045 default:
40046 break;
40049 /* Force memory operand only with base register here. But we
40050 don't want to do it on memory operand for other builtin
40051 functions. */
40052 op1 = ix86_zero_extend_to_Pmode (op1);
40054 if (!insn_data[icode].operand[1].predicate (op0, mode0))
40055 op0 = copy_to_mode_reg (mode0, op0);
40056 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
40057 op1 = copy_to_mode_reg (Pmode, op1);
40058 if (!insn_data[icode].operand[3].predicate (op2, mode2))
40059 op2 = copy_to_mode_reg (mode2, op2);
40061 op3 = fixup_modeless_constant (op3, mode3);
40063 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
40065 if (!insn_data[icode].operand[4].predicate (op3, mode3))
40066 op3 = copy_to_mode_reg (mode3, op3);
40068 else
40070 op3 = copy_to_reg (op3);
40071 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
40073 if (!insn_data[icode].operand[5].predicate (op4, mode4))
40075 error ("the last argument must be scale 1, 2, 4, 8");
40076 return const0_rtx;
40079 /* Optimize. If mask is known to have all high bits set,
40080 replace op0 with pc_rtx to signal that the instruction
40081 overwrites the whole destination and doesn't use its
40082 previous contents. */
40083 if (optimize)
40085 if (TREE_CODE (arg3) == INTEGER_CST)
40087 if (integer_all_onesp (arg3))
40088 op0 = pc_rtx;
40090 else if (TREE_CODE (arg3) == VECTOR_CST)
40092 unsigned int negative = 0;
40093 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
40095 tree cst = VECTOR_CST_ELT (arg3, i);
40096 if (TREE_CODE (cst) == INTEGER_CST
40097 && tree_int_cst_sign_bit (cst))
40098 negative++;
40099 else if (TREE_CODE (cst) == REAL_CST
40100 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
40101 negative++;
40103 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
40104 op0 = pc_rtx;
40106 else if (TREE_CODE (arg3) == SSA_NAME
40107 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
40109 /* Recognize also when mask is like:
40110 __v2df src = _mm_setzero_pd ();
40111 __v2df mask = _mm_cmpeq_pd (src, src);
40113 __v8sf src = _mm256_setzero_ps ();
40114 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
40115 as that is a cheaper way to load all ones into
40116 a register than having to load a constant from
40117 memory. */
40118 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
40119 if (is_gimple_call (def_stmt))
40121 tree fndecl = gimple_call_fndecl (def_stmt);
40122 if (fndecl
40123 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
40124 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
40126 case IX86_BUILTIN_CMPPD:
40127 case IX86_BUILTIN_CMPPS:
40128 case IX86_BUILTIN_CMPPD256:
40129 case IX86_BUILTIN_CMPPS256:
40130 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
40131 break;
40132 /* FALLTHRU */
40133 case IX86_BUILTIN_CMPEQPD:
40134 case IX86_BUILTIN_CMPEQPS:
40135 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
40136 && initializer_zerop (gimple_call_arg (def_stmt,
40137 1)))
40138 op0 = pc_rtx;
40139 break;
40140 default:
40141 break;
40147 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
40148 if (! pat)
40149 return const0_rtx;
40150 emit_insn (pat);
40152 switch (fcode)
40154 case IX86_BUILTIN_GATHER3DIV16SF:
40155 if (target == NULL_RTX)
40156 target = gen_reg_rtx (V8SFmode);
40157 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
40158 break;
40159 case IX86_BUILTIN_GATHER3DIV16SI:
40160 if (target == NULL_RTX)
40161 target = gen_reg_rtx (V8SImode);
40162 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
40163 break;
40164 case IX86_BUILTIN_GATHER3DIV8SF:
40165 case IX86_BUILTIN_GATHERDIV8SF:
40166 if (target == NULL_RTX)
40167 target = gen_reg_rtx (V4SFmode);
40168 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
40169 break;
40170 case IX86_BUILTIN_GATHER3DIV8SI:
40171 case IX86_BUILTIN_GATHERDIV8SI:
40172 if (target == NULL_RTX)
40173 target = gen_reg_rtx (V4SImode);
40174 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
40175 break;
40176 default:
40177 target = subtarget;
40178 break;
40180 return target;
40182 scatter_gen:
40183 arg0 = CALL_EXPR_ARG (exp, 0);
40184 arg1 = CALL_EXPR_ARG (exp, 1);
40185 arg2 = CALL_EXPR_ARG (exp, 2);
40186 arg3 = CALL_EXPR_ARG (exp, 3);
40187 arg4 = CALL_EXPR_ARG (exp, 4);
40188 op0 = expand_normal (arg0);
40189 op1 = expand_normal (arg1);
40190 op2 = expand_normal (arg2);
40191 op3 = expand_normal (arg3);
40192 op4 = expand_normal (arg4);
40193 mode1 = insn_data[icode].operand[1].mode;
40194 mode2 = insn_data[icode].operand[2].mode;
40195 mode3 = insn_data[icode].operand[3].mode;
40196 mode4 = insn_data[icode].operand[4].mode;
40198 /* Force memory operand only with base register here. But we
40199 don't want to do it on memory operand for other builtin
40200 functions. */
40201 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
40203 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
40204 op0 = copy_to_mode_reg (Pmode, op0);
40206 op1 = fixup_modeless_constant (op1, mode1);
40208 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
40210 if (!insn_data[icode].operand[1].predicate (op1, mode1))
40211 op1 = copy_to_mode_reg (mode1, op1);
40213 else
40215 op1 = copy_to_reg (op1);
40216 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
40219 if (!insn_data[icode].operand[2].predicate (op2, mode2))
40220 op2 = copy_to_mode_reg (mode2, op2);
40222 if (!insn_data[icode].operand[3].predicate (op3, mode3))
40223 op3 = copy_to_mode_reg (mode3, op3);
40225 if (!insn_data[icode].operand[4].predicate (op4, mode4))
40227 error ("the last argument must be scale 1, 2, 4, 8");
40228 return const0_rtx;
40231 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
40232 if (! pat)
40233 return const0_rtx;
40235 emit_insn (pat);
40236 return 0;
40238 vec_prefetch_gen:
40239 arg0 = CALL_EXPR_ARG (exp, 0);
40240 arg1 = CALL_EXPR_ARG (exp, 1);
40241 arg2 = CALL_EXPR_ARG (exp, 2);
40242 arg3 = CALL_EXPR_ARG (exp, 3);
40243 arg4 = CALL_EXPR_ARG (exp, 4);
40244 op0 = expand_normal (arg0);
40245 op1 = expand_normal (arg1);
40246 op2 = expand_normal (arg2);
40247 op3 = expand_normal (arg3);
40248 op4 = expand_normal (arg4);
40249 mode0 = insn_data[icode].operand[0].mode;
40250 mode1 = insn_data[icode].operand[1].mode;
40251 mode3 = insn_data[icode].operand[3].mode;
40252 mode4 = insn_data[icode].operand[4].mode;
40254 op0 = fixup_modeless_constant (op0, mode0);
40256 if (GET_MODE (op0) == mode0
40257 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
40259 if (!insn_data[icode].operand[0].predicate (op0, mode0))
40260 op0 = copy_to_mode_reg (mode0, op0);
40262 else if (op0 != constm1_rtx)
40264 op0 = copy_to_reg (op0);
40265 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
40268 if (!insn_data[icode].operand[1].predicate (op1, mode1))
40269 op1 = copy_to_mode_reg (mode1, op1);
40271 /* Force memory operand only with base register here. But we
40272 don't want to do it on memory operand for other builtin
40273 functions. */
40274 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
40276 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
40277 op2 = copy_to_mode_reg (Pmode, op2);
40279 if (!insn_data[icode].operand[3].predicate (op3, mode3))
40281 error ("the forth argument must be scale 1, 2, 4, 8");
40282 return const0_rtx;
40285 if (!insn_data[icode].operand[4].predicate (op4, mode4))
40287 error ("incorrect hint operand");
40288 return const0_rtx;
40291 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
40292 if (! pat)
40293 return const0_rtx;
40295 emit_insn (pat);
40297 return 0;
40299 case IX86_BUILTIN_XABORT:
40300 icode = CODE_FOR_xabort;
40301 arg0 = CALL_EXPR_ARG (exp, 0);
40302 op0 = expand_normal (arg0);
40303 mode0 = insn_data[icode].operand[0].mode;
40304 if (!insn_data[icode].operand[0].predicate (op0, mode0))
40306 error ("the xabort's argument must be an 8-bit immediate");
40307 return const0_rtx;
40309 emit_insn (gen_xabort (op0));
40310 return 0;
40312 default:
40313 break;
40316 for (i = 0, d = bdesc_special_args;
40317 i < ARRAY_SIZE (bdesc_special_args);
40318 i++, d++)
40319 if (d->code == fcode)
40320 return ix86_expand_special_args_builtin (d, exp, target);
40322 for (i = 0, d = bdesc_args;
40323 i < ARRAY_SIZE (bdesc_args);
40324 i++, d++)
40325 if (d->code == fcode)
40326 switch (fcode)
40328 case IX86_BUILTIN_FABSQ:
40329 case IX86_BUILTIN_COPYSIGNQ:
40330 if (!TARGET_SSE)
40331 /* Emit a normal call if SSE isn't available. */
40332 return expand_call (exp, target, ignore);
40333 default:
40334 return ix86_expand_args_builtin (d, exp, target);
40337 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
40338 if (d->code == fcode)
40339 return ix86_expand_sse_comi (d, exp, target);
40341 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
40342 if (d->code == fcode)
40343 return ix86_expand_round_builtin (d, exp, target);
40345 for (i = 0, d = bdesc_pcmpestr;
40346 i < ARRAY_SIZE (bdesc_pcmpestr);
40347 i++, d++)
40348 if (d->code == fcode)
40349 return ix86_expand_sse_pcmpestr (d, exp, target);
40351 for (i = 0, d = bdesc_pcmpistr;
40352 i < ARRAY_SIZE (bdesc_pcmpistr);
40353 i++, d++)
40354 if (d->code == fcode)
40355 return ix86_expand_sse_pcmpistr (d, exp, target);
40357 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
40358 if (d->code == fcode)
40359 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
40360 (enum ix86_builtin_func_type)
40361 d->flag, d->comparison);
40363 gcc_unreachable ();
40366 /* This returns the target-specific builtin with code CODE if
40367 current_function_decl has visibility on this builtin, which is checked
40368 using isa flags. Returns NULL_TREE otherwise. */
40370 static tree ix86_get_builtin (enum ix86_builtins code)
40372 struct cl_target_option *opts;
40373 tree target_tree = NULL_TREE;
40375 /* Determine the isa flags of current_function_decl. */
40377 if (current_function_decl)
40378 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
40380 if (target_tree == NULL)
40381 target_tree = target_option_default_node;
40383 opts = TREE_TARGET_OPTION (target_tree);
40385 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
40386 return ix86_builtin_decl (code, true);
40387 else
40388 return NULL_TREE;
40391 /* Return function decl for target specific builtin
40392 for given MPX builtin passed i FCODE. */
40393 static tree
40394 ix86_builtin_mpx_function (unsigned fcode)
40396 switch (fcode)
40398 case BUILT_IN_CHKP_BNDMK:
40399 return ix86_builtins[IX86_BUILTIN_BNDMK];
40401 case BUILT_IN_CHKP_BNDSTX:
40402 return ix86_builtins[IX86_BUILTIN_BNDSTX];
40404 case BUILT_IN_CHKP_BNDLDX:
40405 return ix86_builtins[IX86_BUILTIN_BNDLDX];
40407 case BUILT_IN_CHKP_BNDCL:
40408 return ix86_builtins[IX86_BUILTIN_BNDCL];
40410 case BUILT_IN_CHKP_BNDCU:
40411 return ix86_builtins[IX86_BUILTIN_BNDCU];
40413 case BUILT_IN_CHKP_BNDRET:
40414 return ix86_builtins[IX86_BUILTIN_BNDRET];
40416 case BUILT_IN_CHKP_INTERSECT:
40417 return ix86_builtins[IX86_BUILTIN_BNDINT];
40419 case BUILT_IN_CHKP_NARROW:
40420 return ix86_builtins[IX86_BUILTIN_BNDNARROW];
40422 case BUILT_IN_CHKP_SIZEOF:
40423 return ix86_builtins[IX86_BUILTIN_SIZEOF];
40425 case BUILT_IN_CHKP_EXTRACT_LOWER:
40426 return ix86_builtins[IX86_BUILTIN_BNDLOWER];
40428 case BUILT_IN_CHKP_EXTRACT_UPPER:
40429 return ix86_builtins[IX86_BUILTIN_BNDUPPER];
40431 default:
40432 return NULL_TREE;
40435 gcc_unreachable ();
40438 /* Helper function for ix86_load_bounds and ix86_store_bounds.
40440 Return an address to be used to load/store bounds for pointer
40441 passed in SLOT.
40443 SLOT_NO is an integer constant holding number of a target
40444 dependent special slot to be used in case SLOT is not a memory.
40446 SPECIAL_BASE is a pointer to be used as a base of fake address
40447 to access special slots in Bounds Table. SPECIAL_BASE[-1],
40448 SPECIAL_BASE[-2] etc. will be used as fake pointer locations. */
40450 static rtx
40451 ix86_get_arg_address_for_bt (rtx slot, rtx slot_no, rtx special_base)
40453 rtx addr = NULL;
40455 /* NULL slot means we pass bounds for pointer not passed to the
40456 function at all. Register slot means we pass pointer in a
40457 register. In both these cases bounds are passed via Bounds
40458 Table. Since we do not have actual pointer stored in memory,
40459 we have to use fake addresses to access Bounds Table. We
40460 start with (special_base - sizeof (void*)) and decrease this
40461 address by pointer size to get addresses for other slots. */
40462 if (!slot || REG_P (slot))
40464 gcc_assert (CONST_INT_P (slot_no));
40465 addr = plus_constant (Pmode, special_base,
40466 -(INTVAL (slot_no) + 1) * GET_MODE_SIZE (Pmode));
40468 /* If pointer is passed in a memory then its address is used to
40469 access Bounds Table. */
40470 else if (MEM_P (slot))
40472 addr = XEXP (slot, 0);
40473 if (!register_operand (addr, Pmode))
40474 addr = copy_addr_to_reg (addr);
40476 else
40477 gcc_unreachable ();
40479 return addr;
40482 /* Expand pass uses this hook to load bounds for function parameter
40483 PTR passed in SLOT in case its bounds are not passed in a register.
40485 If SLOT is a memory, then bounds are loaded as for regular pointer
40486 loaded from memory. PTR may be NULL in case SLOT is a memory.
40487 In such case value of PTR (if required) may be loaded from SLOT.
40489 If SLOT is NULL or a register then SLOT_NO is an integer constant
40490 holding number of the target dependent special slot which should be
40491 used to obtain bounds.
40493 Return loaded bounds. */
40495 static rtx
40496 ix86_load_bounds (rtx slot, rtx ptr, rtx slot_no)
40498 rtx reg = gen_reg_rtx (BNDmode);
40499 rtx addr;
40501 /* Get address to be used to access Bounds Table. Special slots start
40502 at the location of return address of the current function. */
40503 addr = ix86_get_arg_address_for_bt (slot, slot_no, arg_pointer_rtx);
40505 /* Load pointer value from a memory if we don't have it. */
40506 if (!ptr)
40508 gcc_assert (MEM_P (slot));
40509 ptr = copy_addr_to_reg (slot);
40512 if (!register_operand (ptr, Pmode))
40513 ptr = ix86_zero_extend_to_Pmode (ptr);
40515 emit_insn (BNDmode == BND64mode
40516 ? gen_bnd64_ldx (reg, addr, ptr)
40517 : gen_bnd32_ldx (reg, addr, ptr));
40519 return reg;
40522 /* Expand pass uses this hook to store BOUNDS for call argument PTR
40523 passed in SLOT in case BOUNDS are not passed in a register.
40525 If SLOT is a memory, then BOUNDS are stored as for regular pointer
40526 stored in memory. PTR may be NULL in case SLOT is a memory.
40527 In such case value of PTR (if required) may be loaded from SLOT.
40529 If SLOT is NULL or a register then SLOT_NO is an integer constant
40530 holding number of the target dependent special slot which should be
40531 used to store BOUNDS. */
40533 static void
40534 ix86_store_bounds (rtx ptr, rtx slot, rtx bounds, rtx slot_no)
40536 rtx addr;
40538 /* Get address to be used to access Bounds Table. Special slots start
40539 at the location of return address of a called function. */
40540 addr = ix86_get_arg_address_for_bt (slot, slot_no, stack_pointer_rtx);
40542 /* Load pointer value from a memory if we don't have it. */
40543 if (!ptr)
40545 gcc_assert (MEM_P (slot));
40546 ptr = copy_addr_to_reg (slot);
40549 if (!register_operand (ptr, Pmode))
40550 ptr = ix86_zero_extend_to_Pmode (ptr);
40552 gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds)));
40553 if (!register_operand (bounds, BNDmode))
40554 bounds = copy_to_mode_reg (BNDmode, bounds);
40556 emit_insn (BNDmode == BND64mode
40557 ? gen_bnd64_stx (addr, ptr, bounds)
40558 : gen_bnd32_stx (addr, ptr, bounds));
40561 /* Load and return bounds returned by function in SLOT. */
40563 static rtx
40564 ix86_load_returned_bounds (rtx slot)
40566 rtx res;
40568 gcc_assert (REG_P (slot));
40569 res = gen_reg_rtx (BNDmode);
40570 emit_move_insn (res, slot);
40572 return res;
40575 /* Store BOUNDS returned by function into SLOT. */
40577 static void
40578 ix86_store_returned_bounds (rtx slot, rtx bounds)
40580 gcc_assert (REG_P (slot));
40581 emit_move_insn (slot, bounds);
40584 /* Returns a function decl for a vectorized version of the builtin function
40585 with builtin function code FN and the result vector type TYPE, or NULL_TREE
40586 if it is not available. */
40588 static tree
40589 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
40590 tree type_in)
40592 machine_mode in_mode, out_mode;
40593 int in_n, out_n;
40594 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
40596 if (TREE_CODE (type_out) != VECTOR_TYPE
40597 || TREE_CODE (type_in) != VECTOR_TYPE
40598 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
40599 return NULL_TREE;
40601 out_mode = TYPE_MODE (TREE_TYPE (type_out));
40602 out_n = TYPE_VECTOR_SUBPARTS (type_out);
40603 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40604 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40606 switch (fn)
40608 case BUILT_IN_SQRT:
40609 if (out_mode == DFmode && in_mode == DFmode)
40611 if (out_n == 2 && in_n == 2)
40612 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
40613 else if (out_n == 4 && in_n == 4)
40614 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
40615 else if (out_n == 8 && in_n == 8)
40616 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
40618 break;
40620 case BUILT_IN_EXP2F:
40621 if (out_mode == SFmode && in_mode == SFmode)
40623 if (out_n == 16 && in_n == 16)
40624 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
40626 break;
40628 case BUILT_IN_SQRTF:
40629 if (out_mode == SFmode && in_mode == SFmode)
40631 if (out_n == 4 && in_n == 4)
40632 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
40633 else if (out_n == 8 && in_n == 8)
40634 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
40635 else if (out_n == 16 && in_n == 16)
40636 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
40638 break;
40640 case BUILT_IN_IFLOOR:
40641 case BUILT_IN_LFLOOR:
40642 case BUILT_IN_LLFLOOR:
40643 /* The round insn does not trap on denormals. */
40644 if (flag_trapping_math || !TARGET_ROUND)
40645 break;
40647 if (out_mode == SImode && in_mode == DFmode)
40649 if (out_n == 4 && in_n == 2)
40650 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
40651 else if (out_n == 8 && in_n == 4)
40652 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
40653 else if (out_n == 16 && in_n == 8)
40654 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
40656 break;
40658 case BUILT_IN_IFLOORF:
40659 case BUILT_IN_LFLOORF:
40660 case BUILT_IN_LLFLOORF:
40661 /* The round insn does not trap on denormals. */
40662 if (flag_trapping_math || !TARGET_ROUND)
40663 break;
40665 if (out_mode == SImode && in_mode == SFmode)
40667 if (out_n == 4 && in_n == 4)
40668 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
40669 else if (out_n == 8 && in_n == 8)
40670 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
40672 break;
40674 case BUILT_IN_ICEIL:
40675 case BUILT_IN_LCEIL:
40676 case BUILT_IN_LLCEIL:
40677 /* The round insn does not trap on denormals. */
40678 if (flag_trapping_math || !TARGET_ROUND)
40679 break;
40681 if (out_mode == SImode && in_mode == DFmode)
40683 if (out_n == 4 && in_n == 2)
40684 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
40685 else if (out_n == 8 && in_n == 4)
40686 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
40687 else if (out_n == 16 && in_n == 8)
40688 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
40690 break;
40692 case BUILT_IN_ICEILF:
40693 case BUILT_IN_LCEILF:
40694 case BUILT_IN_LLCEILF:
40695 /* The round insn does not trap on denormals. */
40696 if (flag_trapping_math || !TARGET_ROUND)
40697 break;
40699 if (out_mode == SImode && in_mode == SFmode)
40701 if (out_n == 4 && in_n == 4)
40702 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
40703 else if (out_n == 8 && in_n == 8)
40704 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
40706 break;
40708 case BUILT_IN_IRINT:
40709 case BUILT_IN_LRINT:
40710 case BUILT_IN_LLRINT:
40711 if (out_mode == SImode && in_mode == DFmode)
40713 if (out_n == 4 && in_n == 2)
40714 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
40715 else if (out_n == 8 && in_n == 4)
40716 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
40718 break;
40720 case BUILT_IN_IRINTF:
40721 case BUILT_IN_LRINTF:
40722 case BUILT_IN_LLRINTF:
40723 if (out_mode == SImode && in_mode == SFmode)
40725 if (out_n == 4 && in_n == 4)
40726 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
40727 else if (out_n == 8 && in_n == 8)
40728 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
40730 break;
40732 case BUILT_IN_IROUND:
40733 case BUILT_IN_LROUND:
40734 case BUILT_IN_LLROUND:
40735 /* The round insn does not trap on denormals. */
40736 if (flag_trapping_math || !TARGET_ROUND)
40737 break;
40739 if (out_mode == SImode && in_mode == DFmode)
40741 if (out_n == 4 && in_n == 2)
40742 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
40743 else if (out_n == 8 && in_n == 4)
40744 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
40745 else if (out_n == 16 && in_n == 8)
40746 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
40748 break;
40750 case BUILT_IN_IROUNDF:
40751 case BUILT_IN_LROUNDF:
40752 case BUILT_IN_LLROUNDF:
40753 /* The round insn does not trap on denormals. */
40754 if (flag_trapping_math || !TARGET_ROUND)
40755 break;
40757 if (out_mode == SImode && in_mode == SFmode)
40759 if (out_n == 4 && in_n == 4)
40760 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
40761 else if (out_n == 8 && in_n == 8)
40762 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
40764 break;
40766 case BUILT_IN_COPYSIGN:
40767 if (out_mode == DFmode && in_mode == DFmode)
40769 if (out_n == 2 && in_n == 2)
40770 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
40771 else if (out_n == 4 && in_n == 4)
40772 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
40773 else if (out_n == 8 && in_n == 8)
40774 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
40776 break;
40778 case BUILT_IN_COPYSIGNF:
40779 if (out_mode == SFmode && in_mode == SFmode)
40781 if (out_n == 4 && in_n == 4)
40782 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
40783 else if (out_n == 8 && in_n == 8)
40784 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
40785 else if (out_n == 16 && in_n == 16)
40786 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
40788 break;
40790 case BUILT_IN_FLOOR:
40791 /* The round insn does not trap on denormals. */
40792 if (flag_trapping_math || !TARGET_ROUND)
40793 break;
40795 if (out_mode == DFmode && in_mode == DFmode)
40797 if (out_n == 2 && in_n == 2)
40798 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
40799 else if (out_n == 4 && in_n == 4)
40800 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
40802 break;
40804 case BUILT_IN_FLOORF:
40805 /* The round insn does not trap on denormals. */
40806 if (flag_trapping_math || !TARGET_ROUND)
40807 break;
40809 if (out_mode == SFmode && in_mode == SFmode)
40811 if (out_n == 4 && in_n == 4)
40812 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
40813 else if (out_n == 8 && in_n == 8)
40814 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
40816 break;
40818 case BUILT_IN_CEIL:
40819 /* The round insn does not trap on denormals. */
40820 if (flag_trapping_math || !TARGET_ROUND)
40821 break;
40823 if (out_mode == DFmode && in_mode == DFmode)
40825 if (out_n == 2 && in_n == 2)
40826 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
40827 else if (out_n == 4 && in_n == 4)
40828 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
40830 break;
40832 case BUILT_IN_CEILF:
40833 /* The round insn does not trap on denormals. */
40834 if (flag_trapping_math || !TARGET_ROUND)
40835 break;
40837 if (out_mode == SFmode && in_mode == SFmode)
40839 if (out_n == 4 && in_n == 4)
40840 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
40841 else if (out_n == 8 && in_n == 8)
40842 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
40844 break;
40846 case BUILT_IN_TRUNC:
40847 /* The round insn does not trap on denormals. */
40848 if (flag_trapping_math || !TARGET_ROUND)
40849 break;
40851 if (out_mode == DFmode && in_mode == DFmode)
40853 if (out_n == 2 && in_n == 2)
40854 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
40855 else if (out_n == 4 && in_n == 4)
40856 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
40858 break;
40860 case BUILT_IN_TRUNCF:
40861 /* The round insn does not trap on denormals. */
40862 if (flag_trapping_math || !TARGET_ROUND)
40863 break;
40865 if (out_mode == SFmode && in_mode == SFmode)
40867 if (out_n == 4 && in_n == 4)
40868 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
40869 else if (out_n == 8 && in_n == 8)
40870 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
40872 break;
40874 case BUILT_IN_RINT:
40875 /* The round insn does not trap on denormals. */
40876 if (flag_trapping_math || !TARGET_ROUND)
40877 break;
40879 if (out_mode == DFmode && in_mode == DFmode)
40881 if (out_n == 2 && in_n == 2)
40882 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
40883 else if (out_n == 4 && in_n == 4)
40884 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
40886 break;
40888 case BUILT_IN_RINTF:
40889 /* The round insn does not trap on denormals. */
40890 if (flag_trapping_math || !TARGET_ROUND)
40891 break;
40893 if (out_mode == SFmode && in_mode == SFmode)
40895 if (out_n == 4 && in_n == 4)
40896 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
40897 else if (out_n == 8 && in_n == 8)
40898 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
40900 break;
40902 case BUILT_IN_ROUND:
40903 /* The round insn does not trap on denormals. */
40904 if (flag_trapping_math || !TARGET_ROUND)
40905 break;
40907 if (out_mode == DFmode && in_mode == DFmode)
40909 if (out_n == 2 && in_n == 2)
40910 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
40911 else if (out_n == 4 && in_n == 4)
40912 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
40914 break;
40916 case BUILT_IN_ROUNDF:
40917 /* The round insn does not trap on denormals. */
40918 if (flag_trapping_math || !TARGET_ROUND)
40919 break;
40921 if (out_mode == SFmode && in_mode == SFmode)
40923 if (out_n == 4 && in_n == 4)
40924 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
40925 else if (out_n == 8 && in_n == 8)
40926 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
40928 break;
40930 case BUILT_IN_FMA:
40931 if (out_mode == DFmode && in_mode == DFmode)
40933 if (out_n == 2 && in_n == 2)
40934 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
40935 if (out_n == 4 && in_n == 4)
40936 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
40938 break;
40940 case BUILT_IN_FMAF:
40941 if (out_mode == SFmode && in_mode == SFmode)
40943 if (out_n == 4 && in_n == 4)
40944 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
40945 if (out_n == 8 && in_n == 8)
40946 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
40948 break;
40950 default:
40951 break;
40954 /* Dispatch to a handler for a vectorization library. */
40955 if (ix86_veclib_handler)
40956 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
40957 type_in);
40959 return NULL_TREE;
40962 /* Handler for an SVML-style interface to
40963 a library with vectorized intrinsics. */
40965 static tree
40966 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
40968 char name[20];
40969 tree fntype, new_fndecl, args;
40970 unsigned arity;
40971 const char *bname;
40972 machine_mode el_mode, in_mode;
40973 int n, in_n;
40975 /* The SVML is suitable for unsafe math only. */
40976 if (!flag_unsafe_math_optimizations)
40977 return NULL_TREE;
40979 el_mode = TYPE_MODE (TREE_TYPE (type_out));
40980 n = TYPE_VECTOR_SUBPARTS (type_out);
40981 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40982 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40983 if (el_mode != in_mode
40984 || n != in_n)
40985 return NULL_TREE;
40987 switch (fn)
40989 case BUILT_IN_EXP:
40990 case BUILT_IN_LOG:
40991 case BUILT_IN_LOG10:
40992 case BUILT_IN_POW:
40993 case BUILT_IN_TANH:
40994 case BUILT_IN_TAN:
40995 case BUILT_IN_ATAN:
40996 case BUILT_IN_ATAN2:
40997 case BUILT_IN_ATANH:
40998 case BUILT_IN_CBRT:
40999 case BUILT_IN_SINH:
41000 case BUILT_IN_SIN:
41001 case BUILT_IN_ASINH:
41002 case BUILT_IN_ASIN:
41003 case BUILT_IN_COSH:
41004 case BUILT_IN_COS:
41005 case BUILT_IN_ACOSH:
41006 case BUILT_IN_ACOS:
41007 if (el_mode != DFmode || n != 2)
41008 return NULL_TREE;
41009 break;
41011 case BUILT_IN_EXPF:
41012 case BUILT_IN_LOGF:
41013 case BUILT_IN_LOG10F:
41014 case BUILT_IN_POWF:
41015 case BUILT_IN_TANHF:
41016 case BUILT_IN_TANF:
41017 case BUILT_IN_ATANF:
41018 case BUILT_IN_ATAN2F:
41019 case BUILT_IN_ATANHF:
41020 case BUILT_IN_CBRTF:
41021 case BUILT_IN_SINHF:
41022 case BUILT_IN_SINF:
41023 case BUILT_IN_ASINHF:
41024 case BUILT_IN_ASINF:
41025 case BUILT_IN_COSHF:
41026 case BUILT_IN_COSF:
41027 case BUILT_IN_ACOSHF:
41028 case BUILT_IN_ACOSF:
41029 if (el_mode != SFmode || n != 4)
41030 return NULL_TREE;
41031 break;
41033 default:
41034 return NULL_TREE;
41037 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
41039 if (fn == BUILT_IN_LOGF)
41040 strcpy (name, "vmlsLn4");
41041 else if (fn == BUILT_IN_LOG)
41042 strcpy (name, "vmldLn2");
41043 else if (n == 4)
41045 sprintf (name, "vmls%s", bname+10);
41046 name[strlen (name)-1] = '4';
41048 else
41049 sprintf (name, "vmld%s2", bname+10);
41051 /* Convert to uppercase. */
41052 name[4] &= ~0x20;
41054 arity = 0;
41055 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
41056 args;
41057 args = TREE_CHAIN (args))
41058 arity++;
41060 if (arity == 1)
41061 fntype = build_function_type_list (type_out, type_in, NULL);
41062 else
41063 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
41065 /* Build a function declaration for the vectorized function. */
41066 new_fndecl = build_decl (BUILTINS_LOCATION,
41067 FUNCTION_DECL, get_identifier (name), fntype);
41068 TREE_PUBLIC (new_fndecl) = 1;
41069 DECL_EXTERNAL (new_fndecl) = 1;
41070 DECL_IS_NOVOPS (new_fndecl) = 1;
41071 TREE_READONLY (new_fndecl) = 1;
41073 return new_fndecl;
41076 /* Handler for an ACML-style interface to
41077 a library with vectorized intrinsics. */
41079 static tree
41080 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
41082 char name[20] = "__vr.._";
41083 tree fntype, new_fndecl, args;
41084 unsigned arity;
41085 const char *bname;
41086 machine_mode el_mode, in_mode;
41087 int n, in_n;
41089 /* The ACML is 64bits only and suitable for unsafe math only as
41090 it does not correctly support parts of IEEE with the required
41091 precision such as denormals. */
41092 if (!TARGET_64BIT
41093 || !flag_unsafe_math_optimizations)
41094 return NULL_TREE;
41096 el_mode = TYPE_MODE (TREE_TYPE (type_out));
41097 n = TYPE_VECTOR_SUBPARTS (type_out);
41098 in_mode = TYPE_MODE (TREE_TYPE (type_in));
41099 in_n = TYPE_VECTOR_SUBPARTS (type_in);
41100 if (el_mode != in_mode
41101 || n != in_n)
41102 return NULL_TREE;
41104 switch (fn)
41106 case BUILT_IN_SIN:
41107 case BUILT_IN_COS:
41108 case BUILT_IN_EXP:
41109 case BUILT_IN_LOG:
41110 case BUILT_IN_LOG2:
41111 case BUILT_IN_LOG10:
41112 name[4] = 'd';
41113 name[5] = '2';
41114 if (el_mode != DFmode
41115 || n != 2)
41116 return NULL_TREE;
41117 break;
41119 case BUILT_IN_SINF:
41120 case BUILT_IN_COSF:
41121 case BUILT_IN_EXPF:
41122 case BUILT_IN_POWF:
41123 case BUILT_IN_LOGF:
41124 case BUILT_IN_LOG2F:
41125 case BUILT_IN_LOG10F:
41126 name[4] = 's';
41127 name[5] = '4';
41128 if (el_mode != SFmode
41129 || n != 4)
41130 return NULL_TREE;
41131 break;
41133 default:
41134 return NULL_TREE;
41137 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
41138 sprintf (name + 7, "%s", bname+10);
41140 arity = 0;
41141 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
41142 args;
41143 args = TREE_CHAIN (args))
41144 arity++;
41146 if (arity == 1)
41147 fntype = build_function_type_list (type_out, type_in, NULL);
41148 else
41149 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
41151 /* Build a function declaration for the vectorized function. */
41152 new_fndecl = build_decl (BUILTINS_LOCATION,
41153 FUNCTION_DECL, get_identifier (name), fntype);
41154 TREE_PUBLIC (new_fndecl) = 1;
41155 DECL_EXTERNAL (new_fndecl) = 1;
41156 DECL_IS_NOVOPS (new_fndecl) = 1;
41157 TREE_READONLY (new_fndecl) = 1;
41159 return new_fndecl;
41162 /* Returns a decl of a function that implements gather load with
41163 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
41164 Return NULL_TREE if it is not available. */
41166 static tree
41167 ix86_vectorize_builtin_gather (const_tree mem_vectype,
41168 const_tree index_type, int scale)
41170 bool si;
41171 enum ix86_builtins code;
41173 if (! TARGET_AVX2)
41174 return NULL_TREE;
41176 if ((TREE_CODE (index_type) != INTEGER_TYPE
41177 && !POINTER_TYPE_P (index_type))
41178 || (TYPE_MODE (index_type) != SImode
41179 && TYPE_MODE (index_type) != DImode))
41180 return NULL_TREE;
41182 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
41183 return NULL_TREE;
41185 /* v*gather* insn sign extends index to pointer mode. */
41186 if (TYPE_PRECISION (index_type) < POINTER_SIZE
41187 && TYPE_UNSIGNED (index_type))
41188 return NULL_TREE;
41190 if (scale <= 0
41191 || scale > 8
41192 || (scale & (scale - 1)) != 0)
41193 return NULL_TREE;
41195 si = TYPE_MODE (index_type) == SImode;
41196 switch (TYPE_MODE (mem_vectype))
41198 case V2DFmode:
41199 if (TARGET_AVX512VL)
41200 code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
41201 else
41202 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
41203 break;
41204 case V4DFmode:
41205 if (TARGET_AVX512VL)
41206 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
41207 else
41208 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
41209 break;
41210 case V2DImode:
41211 if (TARGET_AVX512VL)
41212 code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
41213 else
41214 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
41215 break;
41216 case V4DImode:
41217 if (TARGET_AVX512VL)
41218 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
41219 else
41220 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
41221 break;
41222 case V4SFmode:
41223 if (TARGET_AVX512VL)
41224 code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
41225 else
41226 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
41227 break;
41228 case V8SFmode:
41229 if (TARGET_AVX512VL)
41230 code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
41231 else
41232 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
41233 break;
41234 case V4SImode:
41235 if (TARGET_AVX512VL)
41236 code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
41237 else
41238 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
41239 break;
41240 case V8SImode:
41241 if (TARGET_AVX512VL)
41242 code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
41243 else
41244 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
41245 break;
41246 case V8DFmode:
41247 if (TARGET_AVX512F)
41248 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
41249 else
41250 return NULL_TREE;
41251 break;
41252 case V8DImode:
41253 if (TARGET_AVX512F)
41254 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
41255 else
41256 return NULL_TREE;
41257 break;
41258 case V16SFmode:
41259 if (TARGET_AVX512F)
41260 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
41261 else
41262 return NULL_TREE;
41263 break;
41264 case V16SImode:
41265 if (TARGET_AVX512F)
41266 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
41267 else
41268 return NULL_TREE;
41269 break;
41270 default:
41271 return NULL_TREE;
41274 return ix86_get_builtin (code);
41277 /* Returns a code for a target-specific builtin that implements
41278 reciprocal of the function, or NULL_TREE if not available. */
41280 static tree
41281 ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool)
41283 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
41284 && flag_finite_math_only && !flag_trapping_math
41285 && flag_unsafe_math_optimizations))
41286 return NULL_TREE;
41288 if (md_fn)
41289 /* Machine dependent builtins. */
41290 switch (fn)
41292 /* Vectorized version of sqrt to rsqrt conversion. */
41293 case IX86_BUILTIN_SQRTPS_NR:
41294 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
41296 case IX86_BUILTIN_SQRTPS_NR256:
41297 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
41299 default:
41300 return NULL_TREE;
41302 else
41303 /* Normal builtins. */
41304 switch (fn)
41306 /* Sqrt to rsqrt conversion. */
41307 case BUILT_IN_SQRTF:
41308 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
41310 default:
41311 return NULL_TREE;
41315 /* Helper for avx_vpermilps256_operand et al. This is also used by
41316 the expansion functions to turn the parallel back into a mask.
41317 The return value is 0 for no match and the imm8+1 for a match. */
41320 avx_vpermilp_parallel (rtx par, machine_mode mode)
41322 unsigned i, nelt = GET_MODE_NUNITS (mode);
41323 unsigned mask = 0;
41324 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
41326 if (XVECLEN (par, 0) != (int) nelt)
41327 return 0;
41329 /* Validate that all of the elements are constants, and not totally
41330 out of range. Copy the data into an integral array to make the
41331 subsequent checks easier. */
41332 for (i = 0; i < nelt; ++i)
41334 rtx er = XVECEXP (par, 0, i);
41335 unsigned HOST_WIDE_INT ei;
41337 if (!CONST_INT_P (er))
41338 return 0;
41339 ei = INTVAL (er);
41340 if (ei >= nelt)
41341 return 0;
41342 ipar[i] = ei;
41345 switch (mode)
41347 case V8DFmode:
41348 /* In the 512-bit DFmode case, we can only move elements within
41349 a 128-bit lane. First fill the second part of the mask,
41350 then fallthru. */
41351 for (i = 4; i < 6; ++i)
41353 if (ipar[i] < 4 || ipar[i] >= 6)
41354 return 0;
41355 mask |= (ipar[i] - 4) << i;
41357 for (i = 6; i < 8; ++i)
41359 if (ipar[i] < 6)
41360 return 0;
41361 mask |= (ipar[i] - 6) << i;
41363 /* FALLTHRU */
41365 case V4DFmode:
41366 /* In the 256-bit DFmode case, we can only move elements within
41367 a 128-bit lane. */
41368 for (i = 0; i < 2; ++i)
41370 if (ipar[i] >= 2)
41371 return 0;
41372 mask |= ipar[i] << i;
41374 for (i = 2; i < 4; ++i)
41376 if (ipar[i] < 2)
41377 return 0;
41378 mask |= (ipar[i] - 2) << i;
41380 break;
41382 case V16SFmode:
41383 /* In 512 bit SFmode case, permutation in the upper 256 bits
41384 must mirror the permutation in the lower 256-bits. */
41385 for (i = 0; i < 8; ++i)
41386 if (ipar[i] + 8 != ipar[i + 8])
41387 return 0;
41388 /* FALLTHRU */
41390 case V8SFmode:
41391 /* In 256 bit SFmode case, we have full freedom of
41392 movement within the low 128-bit lane, but the high 128-bit
41393 lane must mirror the exact same pattern. */
41394 for (i = 0; i < 4; ++i)
41395 if (ipar[i] + 4 != ipar[i + 4])
41396 return 0;
41397 nelt = 4;
41398 /* FALLTHRU */
41400 case V2DFmode:
41401 case V4SFmode:
41402 /* In the 128-bit case, we've full freedom in the placement of
41403 the elements from the source operand. */
41404 for (i = 0; i < nelt; ++i)
41405 mask |= ipar[i] << (i * (nelt / 2));
41406 break;
41408 default:
41409 gcc_unreachable ();
41412 /* Make sure success has a non-zero value by adding one. */
41413 return mask + 1;
41416 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
41417 the expansion functions to turn the parallel back into a mask.
41418 The return value is 0 for no match and the imm8+1 for a match. */
41421 avx_vperm2f128_parallel (rtx par, machine_mode mode)
41423 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
41424 unsigned mask = 0;
41425 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
41427 if (XVECLEN (par, 0) != (int) nelt)
41428 return 0;
41430 /* Validate that all of the elements are constants, and not totally
41431 out of range. Copy the data into an integral array to make the
41432 subsequent checks easier. */
41433 for (i = 0; i < nelt; ++i)
41435 rtx er = XVECEXP (par, 0, i);
41436 unsigned HOST_WIDE_INT ei;
41438 if (!CONST_INT_P (er))
41439 return 0;
41440 ei = INTVAL (er);
41441 if (ei >= 2 * nelt)
41442 return 0;
41443 ipar[i] = ei;
41446 /* Validate that the halves of the permute are halves. */
41447 for (i = 0; i < nelt2 - 1; ++i)
41448 if (ipar[i] + 1 != ipar[i + 1])
41449 return 0;
41450 for (i = nelt2; i < nelt - 1; ++i)
41451 if (ipar[i] + 1 != ipar[i + 1])
41452 return 0;
41454 /* Reconstruct the mask. */
41455 for (i = 0; i < 2; ++i)
41457 unsigned e = ipar[i * nelt2];
41458 if (e % nelt2)
41459 return 0;
41460 e /= nelt2;
41461 mask |= e << (i * 4);
41464 /* Make sure success has a non-zero value by adding one. */
41465 return mask + 1;
41468 /* Return a register priority for hard reg REGNO. */
41469 static int
41470 ix86_register_priority (int hard_regno)
41472 /* ebp and r13 as the base always wants a displacement, r12 as the
41473 base always wants an index. So discourage their usage in an
41474 address. */
41475 if (hard_regno == R12_REG || hard_regno == R13_REG)
41476 return 0;
41477 if (hard_regno == BP_REG)
41478 return 1;
41479 /* New x86-64 int registers result in bigger code size. Discourage
41480 them. */
41481 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
41482 return 2;
41483 /* New x86-64 SSE registers result in bigger code size. Discourage
41484 them. */
41485 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
41486 return 2;
41487 /* Usage of AX register results in smaller code. Prefer it. */
41488 if (hard_regno == AX_REG)
41489 return 4;
41490 return 3;
41493 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
41495 Put float CONST_DOUBLE in the constant pool instead of fp regs.
41496 QImode must go into class Q_REGS.
41497 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
41498 movdf to do mem-to-mem moves through integer regs. */
41500 static reg_class_t
41501 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
41503 machine_mode mode = GET_MODE (x);
41505 /* We're only allowed to return a subclass of CLASS. Many of the
41506 following checks fail for NO_REGS, so eliminate that early. */
41507 if (regclass == NO_REGS)
41508 return NO_REGS;
41510 /* All classes can load zeros. */
41511 if (x == CONST0_RTX (mode))
41512 return regclass;
41514 /* Force constants into memory if we are loading a (nonzero) constant into
41515 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
41516 instructions to load from a constant. */
41517 if (CONSTANT_P (x)
41518 && (MAYBE_MMX_CLASS_P (regclass)
41519 || MAYBE_SSE_CLASS_P (regclass)
41520 || MAYBE_MASK_CLASS_P (regclass)))
41521 return NO_REGS;
41523 /* Prefer SSE regs only, if we can use them for math. */
41524 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
41525 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
41527 /* Floating-point constants need more complex checks. */
41528 if (CONST_DOUBLE_P (x))
41530 /* General regs can load everything. */
41531 if (reg_class_subset_p (regclass, GENERAL_REGS))
41532 return regclass;
41534 /* Floats can load 0 and 1 plus some others. Note that we eliminated
41535 zero above. We only want to wind up preferring 80387 registers if
41536 we plan on doing computation with them. */
41537 if (TARGET_80387
41538 && standard_80387_constant_p (x) > 0)
41540 /* Limit class to non-sse. */
41541 if (regclass == FLOAT_SSE_REGS)
41542 return FLOAT_REGS;
41543 if (regclass == FP_TOP_SSE_REGS)
41544 return FP_TOP_REG;
41545 if (regclass == FP_SECOND_SSE_REGS)
41546 return FP_SECOND_REG;
41547 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
41548 return regclass;
41551 return NO_REGS;
41554 /* Generally when we see PLUS here, it's the function invariant
41555 (plus soft-fp const_int). Which can only be computed into general
41556 regs. */
41557 if (GET_CODE (x) == PLUS)
41558 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
41560 /* QImode constants are easy to load, but non-constant QImode data
41561 must go into Q_REGS. */
41562 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
41564 if (reg_class_subset_p (regclass, Q_REGS))
41565 return regclass;
41566 if (reg_class_subset_p (Q_REGS, regclass))
41567 return Q_REGS;
41568 return NO_REGS;
41571 return regclass;
41574 /* Discourage putting floating-point values in SSE registers unless
41575 SSE math is being used, and likewise for the 387 registers. */
41576 static reg_class_t
41577 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
41579 machine_mode mode = GET_MODE (x);
41581 /* Restrict the output reload class to the register bank that we are doing
41582 math on. If we would like not to return a subset of CLASS, reject this
41583 alternative: if reload cannot do this, it will still use its choice. */
41584 mode = GET_MODE (x);
41585 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
41586 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
41588 if (X87_FLOAT_MODE_P (mode))
41590 if (regclass == FP_TOP_SSE_REGS)
41591 return FP_TOP_REG;
41592 else if (regclass == FP_SECOND_SSE_REGS)
41593 return FP_SECOND_REG;
41594 else
41595 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
41598 return regclass;
41601 static reg_class_t
41602 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
41603 machine_mode mode, secondary_reload_info *sri)
41605 /* Double-word spills from general registers to non-offsettable memory
41606 references (zero-extended addresses) require special handling. */
41607 if (TARGET_64BIT
41608 && MEM_P (x)
41609 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
41610 && INTEGER_CLASS_P (rclass)
41611 && !offsettable_memref_p (x))
41613 sri->icode = (in_p
41614 ? CODE_FOR_reload_noff_load
41615 : CODE_FOR_reload_noff_store);
41616 /* Add the cost of moving address to a temporary. */
41617 sri->extra_cost = 1;
41619 return NO_REGS;
41622 /* QImode spills from non-QI registers require
41623 intermediate register on 32bit targets. */
41624 if (mode == QImode
41625 && (MAYBE_MASK_CLASS_P (rclass)
41626 || (!TARGET_64BIT && !in_p
41627 && INTEGER_CLASS_P (rclass)
41628 && MAYBE_NON_Q_CLASS_P (rclass))))
41630 int regno;
41632 if (REG_P (x))
41633 regno = REGNO (x);
41634 else
41635 regno = -1;
41637 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
41638 regno = true_regnum (x);
41640 /* Return Q_REGS if the operand is in memory. */
41641 if (regno == -1)
41642 return Q_REGS;
41645 /* This condition handles corner case where an expression involving
41646 pointers gets vectorized. We're trying to use the address of a
41647 stack slot as a vector initializer.
41649 (set (reg:V2DI 74 [ vect_cst_.2 ])
41650 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
41652 Eventually frame gets turned into sp+offset like this:
41654 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41655 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41656 (const_int 392 [0x188]))))
41658 That later gets turned into:
41660 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41661 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41662 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
41664 We'll have the following reload recorded:
41666 Reload 0: reload_in (DI) =
41667 (plus:DI (reg/f:DI 7 sp)
41668 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
41669 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41670 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
41671 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
41672 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41673 reload_reg_rtx: (reg:V2DI 22 xmm1)
41675 Which isn't going to work since SSE instructions can't handle scalar
41676 additions. Returning GENERAL_REGS forces the addition into integer
41677 register and reload can handle subsequent reloads without problems. */
41679 if (in_p && GET_CODE (x) == PLUS
41680 && SSE_CLASS_P (rclass)
41681 && SCALAR_INT_MODE_P (mode))
41682 return GENERAL_REGS;
41684 return NO_REGS;
41687 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
41689 static bool
41690 ix86_class_likely_spilled_p (reg_class_t rclass)
41692 switch (rclass)
41694 case AREG:
41695 case DREG:
41696 case CREG:
41697 case BREG:
41698 case AD_REGS:
41699 case SIREG:
41700 case DIREG:
41701 case SSE_FIRST_REG:
41702 case FP_TOP_REG:
41703 case FP_SECOND_REG:
41704 case BND_REGS:
41705 return true;
41707 default:
41708 break;
41711 return false;
41714 /* If we are copying between general and FP registers, we need a memory
41715 location. The same is true for SSE and MMX registers.
41717 To optimize register_move_cost performance, allow inline variant.
41719 The macro can't work reliably when one of the CLASSES is class containing
41720 registers from multiple units (SSE, MMX, integer). We avoid this by never
41721 combining those units in single alternative in the machine description.
41722 Ensure that this constraint holds to avoid unexpected surprises.
41724 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
41725 enforce these sanity checks. */
41727 static inline bool
41728 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41729 machine_mode mode, int strict)
41731 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
41732 return false;
41733 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
41734 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
41735 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
41736 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
41737 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
41738 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
41740 gcc_assert (!strict || lra_in_progress);
41741 return true;
41744 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
41745 return true;
41747 /* Between mask and general, we have moves no larger than word size. */
41748 if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
41749 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
41750 return true;
41752 /* ??? This is a lie. We do have moves between mmx/general, and for
41753 mmx/sse2. But by saying we need secondary memory we discourage the
41754 register allocator from using the mmx registers unless needed. */
41755 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
41756 return true;
41758 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41760 /* SSE1 doesn't have any direct moves from other classes. */
41761 if (!TARGET_SSE2)
41762 return true;
41764 /* If the target says that inter-unit moves are more expensive
41765 than moving through memory, then don't generate them. */
41766 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
41767 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
41768 return true;
41770 /* Between SSE and general, we have moves no larger than word size. */
41771 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
41772 return true;
41775 return false;
41778 bool
41779 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41780 machine_mode mode, int strict)
41782 return inline_secondary_memory_needed (class1, class2, mode, strict);
41785 /* Implement the TARGET_CLASS_MAX_NREGS hook.
41787 On the 80386, this is the size of MODE in words,
41788 except in the FP regs, where a single reg is always enough. */
41790 static unsigned char
41791 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
41793 if (MAYBE_INTEGER_CLASS_P (rclass))
41795 if (mode == XFmode)
41796 return (TARGET_64BIT ? 2 : 3);
41797 else if (mode == XCmode)
41798 return (TARGET_64BIT ? 4 : 6);
41799 else
41800 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
41802 else
41804 if (COMPLEX_MODE_P (mode))
41805 return 2;
41806 else
41807 return 1;
41811 /* Return true if the registers in CLASS cannot represent the change from
41812 modes FROM to TO. */
41814 bool
41815 ix86_cannot_change_mode_class (machine_mode from, machine_mode to,
41816 enum reg_class regclass)
41818 if (from == to)
41819 return false;
41821 /* x87 registers can't do subreg at all, as all values are reformatted
41822 to extended precision. */
41823 if (MAYBE_FLOAT_CLASS_P (regclass))
41824 return true;
41826 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
41828 /* Vector registers do not support QI or HImode loads. If we don't
41829 disallow a change to these modes, reload will assume it's ok to
41830 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
41831 the vec_dupv4hi pattern. */
41832 if (GET_MODE_SIZE (from) < 4)
41833 return true;
41836 return false;
41839 /* Return the cost of moving data of mode M between a
41840 register and memory. A value of 2 is the default; this cost is
41841 relative to those in `REGISTER_MOVE_COST'.
41843 This function is used extensively by register_move_cost that is used to
41844 build tables at startup. Make it inline in this case.
41845 When IN is 2, return maximum of in and out move cost.
41847 If moving between registers and memory is more expensive than
41848 between two registers, you should define this macro to express the
41849 relative cost.
41851 Model also increased moving costs of QImode registers in non
41852 Q_REGS classes.
41854 static inline int
41855 inline_memory_move_cost (machine_mode mode, enum reg_class regclass,
41856 int in)
41858 int cost;
41859 if (FLOAT_CLASS_P (regclass))
41861 int index;
41862 switch (mode)
41864 case SFmode:
41865 index = 0;
41866 break;
41867 case DFmode:
41868 index = 1;
41869 break;
41870 case XFmode:
41871 index = 2;
41872 break;
41873 default:
41874 return 100;
41876 if (in == 2)
41877 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
41878 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
41880 if (SSE_CLASS_P (regclass))
41882 int index;
41883 switch (GET_MODE_SIZE (mode))
41885 case 4:
41886 index = 0;
41887 break;
41888 case 8:
41889 index = 1;
41890 break;
41891 case 16:
41892 index = 2;
41893 break;
41894 default:
41895 return 100;
41897 if (in == 2)
41898 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
41899 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
41901 if (MMX_CLASS_P (regclass))
41903 int index;
41904 switch (GET_MODE_SIZE (mode))
41906 case 4:
41907 index = 0;
41908 break;
41909 case 8:
41910 index = 1;
41911 break;
41912 default:
41913 return 100;
41915 if (in)
41916 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
41917 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
41919 switch (GET_MODE_SIZE (mode))
41921 case 1:
41922 if (Q_CLASS_P (regclass) || TARGET_64BIT)
41924 if (!in)
41925 return ix86_cost->int_store[0];
41926 if (TARGET_PARTIAL_REG_DEPENDENCY
41927 && optimize_function_for_speed_p (cfun))
41928 cost = ix86_cost->movzbl_load;
41929 else
41930 cost = ix86_cost->int_load[0];
41931 if (in == 2)
41932 return MAX (cost, ix86_cost->int_store[0]);
41933 return cost;
41935 else
41937 if (in == 2)
41938 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
41939 if (in)
41940 return ix86_cost->movzbl_load;
41941 else
41942 return ix86_cost->int_store[0] + 4;
41944 break;
41945 case 2:
41946 if (in == 2)
41947 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
41948 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
41949 default:
41950 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
41951 if (mode == TFmode)
41952 mode = XFmode;
41953 if (in == 2)
41954 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
41955 else if (in)
41956 cost = ix86_cost->int_load[2];
41957 else
41958 cost = ix86_cost->int_store[2];
41959 return (cost * (((int) GET_MODE_SIZE (mode)
41960 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
41964 static int
41965 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass,
41966 bool in)
41968 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
41972 /* Return the cost of moving data from a register in class CLASS1 to
41973 one in class CLASS2.
41975 It is not required that the cost always equal 2 when FROM is the same as TO;
41976 on some machines it is expensive to move between registers if they are not
41977 general registers. */
41979 static int
41980 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
41981 reg_class_t class2_i)
41983 enum reg_class class1 = (enum reg_class) class1_i;
41984 enum reg_class class2 = (enum reg_class) class2_i;
41986 /* In case we require secondary memory, compute cost of the store followed
41987 by load. In order to avoid bad register allocation choices, we need
41988 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
41990 if (inline_secondary_memory_needed (class1, class2, mode, 0))
41992 int cost = 1;
41994 cost += inline_memory_move_cost (mode, class1, 2);
41995 cost += inline_memory_move_cost (mode, class2, 2);
41997 /* In case of copying from general_purpose_register we may emit multiple
41998 stores followed by single load causing memory size mismatch stall.
41999 Count this as arbitrarily high cost of 20. */
42000 if (targetm.class_max_nregs (class1, mode)
42001 > targetm.class_max_nregs (class2, mode))
42002 cost += 20;
42004 /* In the case of FP/MMX moves, the registers actually overlap, and we
42005 have to switch modes in order to treat them differently. */
42006 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
42007 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
42008 cost += 20;
42010 return cost;
42013 /* Moves between SSE/MMX and integer unit are expensive. */
42014 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
42015 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
42017 /* ??? By keeping returned value relatively high, we limit the number
42018 of moves between integer and MMX/SSE registers for all targets.
42019 Additionally, high value prevents problem with x86_modes_tieable_p(),
42020 where integer modes in MMX/SSE registers are not tieable
42021 because of missing QImode and HImode moves to, from or between
42022 MMX/SSE registers. */
42023 return MAX (8, ix86_cost->mmxsse_to_integer);
42025 if (MAYBE_FLOAT_CLASS_P (class1))
42026 return ix86_cost->fp_move;
42027 if (MAYBE_SSE_CLASS_P (class1))
42028 return ix86_cost->sse_move;
42029 if (MAYBE_MMX_CLASS_P (class1))
42030 return ix86_cost->mmx_move;
42031 return 2;
42034 /* Return TRUE if hard register REGNO can hold a value of machine-mode
42035 MODE. */
42037 bool
42038 ix86_hard_regno_mode_ok (int regno, machine_mode mode)
42040 /* Flags and only flags can only hold CCmode values. */
42041 if (CC_REGNO_P (regno))
42042 return GET_MODE_CLASS (mode) == MODE_CC;
42043 if (GET_MODE_CLASS (mode) == MODE_CC
42044 || GET_MODE_CLASS (mode) == MODE_RANDOM
42045 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
42046 return false;
42047 if (STACK_REGNO_P (regno))
42048 return VALID_FP_MODE_P (mode);
42049 if (MASK_REGNO_P (regno))
42050 return (VALID_MASK_REG_MODE (mode)
42051 || (TARGET_AVX512BW
42052 && VALID_MASK_AVX512BW_MODE (mode)));
42053 if (BND_REGNO_P (regno))
42054 return VALID_BND_REG_MODE (mode);
42055 if (SSE_REGNO_P (regno))
42057 /* We implement the move patterns for all vector modes into and
42058 out of SSE registers, even when no operation instructions
42059 are available. */
42061 /* For AVX-512 we allow, regardless of regno:
42062 - XI mode
42063 - any of 512-bit wide vector mode
42064 - any scalar mode. */
42065 if (TARGET_AVX512F
42066 && (mode == XImode
42067 || VALID_AVX512F_REG_MODE (mode)
42068 || VALID_AVX512F_SCALAR_MODE (mode)))
42069 return true;
42071 /* TODO check for QI/HI scalars. */
42072 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
42073 if (TARGET_AVX512VL
42074 && (mode == OImode
42075 || mode == TImode
42076 || VALID_AVX256_REG_MODE (mode)
42077 || VALID_AVX512VL_128_REG_MODE (mode)))
42078 return true;
42080 /* xmm16-xmm31 are only available for AVX-512. */
42081 if (EXT_REX_SSE_REGNO_P (regno))
42082 return false;
42084 /* OImode and AVX modes are available only when AVX is enabled. */
42085 return ((TARGET_AVX
42086 && VALID_AVX256_REG_OR_OI_MODE (mode))
42087 || VALID_SSE_REG_MODE (mode)
42088 || VALID_SSE2_REG_MODE (mode)
42089 || VALID_MMX_REG_MODE (mode)
42090 || VALID_MMX_REG_MODE_3DNOW (mode));
42092 if (MMX_REGNO_P (regno))
42094 /* We implement the move patterns for 3DNOW modes even in MMX mode,
42095 so if the register is available at all, then we can move data of
42096 the given mode into or out of it. */
42097 return (VALID_MMX_REG_MODE (mode)
42098 || VALID_MMX_REG_MODE_3DNOW (mode));
42101 if (mode == QImode)
42103 /* Take care for QImode values - they can be in non-QI regs,
42104 but then they do cause partial register stalls. */
42105 if (ANY_QI_REGNO_P (regno))
42106 return true;
42107 if (!TARGET_PARTIAL_REG_STALL)
42108 return true;
42109 /* LRA checks if the hard register is OK for the given mode.
42110 QImode values can live in non-QI regs, so we allow all
42111 registers here. */
42112 if (lra_in_progress)
42113 return true;
42114 return !can_create_pseudo_p ();
42116 /* We handle both integer and floats in the general purpose registers. */
42117 else if (VALID_INT_MODE_P (mode))
42118 return true;
42119 else if (VALID_FP_MODE_P (mode))
42120 return true;
42121 else if (VALID_DFP_MODE_P (mode))
42122 return true;
42123 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
42124 on to use that value in smaller contexts, this can easily force a
42125 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
42126 supporting DImode, allow it. */
42127 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
42128 return true;
42130 return false;
42133 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
42134 tieable integer mode. */
42136 static bool
42137 ix86_tieable_integer_mode_p (machine_mode mode)
42139 switch (mode)
42141 case HImode:
42142 case SImode:
42143 return true;
42145 case QImode:
42146 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
42148 case DImode:
42149 return TARGET_64BIT;
42151 default:
42152 return false;
42156 /* Return true if MODE1 is accessible in a register that can hold MODE2
42157 without copying. That is, all register classes that can hold MODE2
42158 can also hold MODE1. */
42160 bool
42161 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
42163 if (mode1 == mode2)
42164 return true;
42166 if (ix86_tieable_integer_mode_p (mode1)
42167 && ix86_tieable_integer_mode_p (mode2))
42168 return true;
42170 /* MODE2 being XFmode implies fp stack or general regs, which means we
42171 can tie any smaller floating point modes to it. Note that we do not
42172 tie this with TFmode. */
42173 if (mode2 == XFmode)
42174 return mode1 == SFmode || mode1 == DFmode;
42176 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
42177 that we can tie it with SFmode. */
42178 if (mode2 == DFmode)
42179 return mode1 == SFmode;
42181 /* If MODE2 is only appropriate for an SSE register, then tie with
42182 any other mode acceptable to SSE registers. */
42183 if (GET_MODE_SIZE (mode2) == 32
42184 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
42185 return (GET_MODE_SIZE (mode1) == 32
42186 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
42187 if (GET_MODE_SIZE (mode2) == 16
42188 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
42189 return (GET_MODE_SIZE (mode1) == 16
42190 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
42192 /* If MODE2 is appropriate for an MMX register, then tie
42193 with any other mode acceptable to MMX registers. */
42194 if (GET_MODE_SIZE (mode2) == 8
42195 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
42196 return (GET_MODE_SIZE (mode1) == 8
42197 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
42199 return false;
42202 /* Return the cost of moving between two registers of mode MODE. */
42204 static int
42205 ix86_set_reg_reg_cost (machine_mode mode)
42207 unsigned int units = UNITS_PER_WORD;
42209 switch (GET_MODE_CLASS (mode))
42211 default:
42212 break;
42214 case MODE_CC:
42215 units = GET_MODE_SIZE (CCmode);
42216 break;
42218 case MODE_FLOAT:
42219 if ((TARGET_SSE && mode == TFmode)
42220 || (TARGET_80387 && mode == XFmode)
42221 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
42222 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
42223 units = GET_MODE_SIZE (mode);
42224 break;
42226 case MODE_COMPLEX_FLOAT:
42227 if ((TARGET_SSE && mode == TCmode)
42228 || (TARGET_80387 && mode == XCmode)
42229 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
42230 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
42231 units = GET_MODE_SIZE (mode);
42232 break;
42234 case MODE_VECTOR_INT:
42235 case MODE_VECTOR_FLOAT:
42236 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
42237 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
42238 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
42239 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
42240 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
42241 units = GET_MODE_SIZE (mode);
42244 /* Return the cost of moving between two registers of mode MODE,
42245 assuming that the move will be in pieces of at most UNITS bytes. */
42246 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
42249 /* Compute a (partial) cost for rtx X. Return true if the complete
42250 cost has been computed, and false if subexpressions should be
42251 scanned. In either case, *TOTAL contains the cost result. */
42253 static bool
42254 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
42255 bool speed)
42257 rtx mask;
42258 enum rtx_code code = (enum rtx_code) code_i;
42259 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
42260 machine_mode mode = GET_MODE (x);
42261 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
42263 switch (code)
42265 case SET:
42266 if (register_operand (SET_DEST (x), VOIDmode)
42267 && reg_or_0_operand (SET_SRC (x), VOIDmode))
42269 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
42270 return true;
42272 return false;
42274 case CONST_INT:
42275 case CONST:
42276 case LABEL_REF:
42277 case SYMBOL_REF:
42278 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
42279 *total = 3;
42280 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
42281 *total = 2;
42282 else if (flag_pic && SYMBOLIC_CONST (x)
42283 && !(TARGET_64BIT
42284 && (GET_CODE (x) == LABEL_REF
42285 || (GET_CODE (x) == SYMBOL_REF
42286 && SYMBOL_REF_LOCAL_P (x))))
42287 /* Use 0 cost for CONST to improve its propagation. */
42288 && (TARGET_64BIT || GET_CODE (x) != CONST))
42289 *total = 1;
42290 else
42291 *total = 0;
42292 return true;
42294 case CONST_WIDE_INT:
42295 *total = 0;
42296 return true;
42298 case CONST_DOUBLE:
42299 switch (standard_80387_constant_p (x))
42301 case 1: /* 0.0 */
42302 *total = 1;
42303 return true;
42304 default: /* Other constants */
42305 *total = 2;
42306 return true;
42307 case 0:
42308 case -1:
42309 break;
42311 if (SSE_FLOAT_MODE_P (mode))
42313 case CONST_VECTOR:
42314 switch (standard_sse_constant_p (x))
42316 case 0:
42317 break;
42318 case 1: /* 0: xor eliminates false dependency */
42319 *total = 0;
42320 return true;
42321 default: /* -1: cmp contains false dependency */
42322 *total = 1;
42323 return true;
42326 /* Fall back to (MEM (SYMBOL_REF)), since that's where
42327 it'll probably end up. Add a penalty for size. */
42328 *total = (COSTS_N_INSNS (1)
42329 + (flag_pic != 0 && !TARGET_64BIT)
42330 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
42331 return true;
42333 case ZERO_EXTEND:
42334 /* The zero extensions is often completely free on x86_64, so make
42335 it as cheap as possible. */
42336 if (TARGET_64BIT && mode == DImode
42337 && GET_MODE (XEXP (x, 0)) == SImode)
42338 *total = 1;
42339 else if (TARGET_ZERO_EXTEND_WITH_AND)
42340 *total = cost->add;
42341 else
42342 *total = cost->movzx;
42343 return false;
42345 case SIGN_EXTEND:
42346 *total = cost->movsx;
42347 return false;
42349 case ASHIFT:
42350 if (SCALAR_INT_MODE_P (mode)
42351 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
42352 && CONST_INT_P (XEXP (x, 1)))
42354 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
42355 if (value == 1)
42357 *total = cost->add;
42358 return false;
42360 if ((value == 2 || value == 3)
42361 && cost->lea <= cost->shift_const)
42363 *total = cost->lea;
42364 return false;
42367 /* FALLTHRU */
42369 case ROTATE:
42370 case ASHIFTRT:
42371 case LSHIFTRT:
42372 case ROTATERT:
42373 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42375 /* ??? Should be SSE vector operation cost. */
42376 /* At least for published AMD latencies, this really is the same
42377 as the latency for a simple fpu operation like fabs. */
42378 /* V*QImode is emulated with 1-11 insns. */
42379 if (mode == V16QImode || mode == V32QImode)
42381 int count = 11;
42382 if (TARGET_XOP && mode == V16QImode)
42384 /* For XOP we use vpshab, which requires a broadcast of the
42385 value to the variable shift insn. For constants this
42386 means a V16Q const in mem; even when we can perform the
42387 shift with one insn set the cost to prefer paddb. */
42388 if (CONSTANT_P (XEXP (x, 1)))
42390 *total = (cost->fabs
42391 + rtx_cost (XEXP (x, 0), code, 0, speed)
42392 + (speed ? 2 : COSTS_N_BYTES (16)));
42393 return true;
42395 count = 3;
42397 else if (TARGET_SSSE3)
42398 count = 7;
42399 *total = cost->fabs * count;
42401 else
42402 *total = cost->fabs;
42404 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42406 if (CONST_INT_P (XEXP (x, 1)))
42408 if (INTVAL (XEXP (x, 1)) > 32)
42409 *total = cost->shift_const + COSTS_N_INSNS (2);
42410 else
42411 *total = cost->shift_const * 2;
42413 else
42415 if (GET_CODE (XEXP (x, 1)) == AND)
42416 *total = cost->shift_var * 2;
42417 else
42418 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
42421 else
42423 if (CONST_INT_P (XEXP (x, 1)))
42424 *total = cost->shift_const;
42425 else if (GET_CODE (XEXP (x, 1)) == SUBREG
42426 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
42428 /* Return the cost after shift-and truncation. */
42429 *total = cost->shift_var;
42430 return true;
42432 else
42433 *total = cost->shift_var;
42435 return false;
42437 case FMA:
42439 rtx sub;
42441 gcc_assert (FLOAT_MODE_P (mode));
42442 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
42444 /* ??? SSE scalar/vector cost should be used here. */
42445 /* ??? Bald assumption that fma has the same cost as fmul. */
42446 *total = cost->fmul;
42447 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
42449 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
42450 sub = XEXP (x, 0);
42451 if (GET_CODE (sub) == NEG)
42452 sub = XEXP (sub, 0);
42453 *total += rtx_cost (sub, FMA, 0, speed);
42455 sub = XEXP (x, 2);
42456 if (GET_CODE (sub) == NEG)
42457 sub = XEXP (sub, 0);
42458 *total += rtx_cost (sub, FMA, 2, speed);
42459 return true;
42462 case MULT:
42463 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42465 /* ??? SSE scalar cost should be used here. */
42466 *total = cost->fmul;
42467 return false;
42469 else if (X87_FLOAT_MODE_P (mode))
42471 *total = cost->fmul;
42472 return false;
42474 else if (FLOAT_MODE_P (mode))
42476 /* ??? SSE vector cost should be used here. */
42477 *total = cost->fmul;
42478 return false;
42480 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42482 /* V*QImode is emulated with 7-13 insns. */
42483 if (mode == V16QImode || mode == V32QImode)
42485 int extra = 11;
42486 if (TARGET_XOP && mode == V16QImode)
42487 extra = 5;
42488 else if (TARGET_SSSE3)
42489 extra = 6;
42490 *total = cost->fmul * 2 + cost->fabs * extra;
42492 /* V*DImode is emulated with 5-8 insns. */
42493 else if (mode == V2DImode || mode == V4DImode)
42495 if (TARGET_XOP && mode == V2DImode)
42496 *total = cost->fmul * 2 + cost->fabs * 3;
42497 else
42498 *total = cost->fmul * 3 + cost->fabs * 5;
42500 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
42501 insns, including two PMULUDQ. */
42502 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
42503 *total = cost->fmul * 2 + cost->fabs * 5;
42504 else
42505 *total = cost->fmul;
42506 return false;
42508 else
42510 rtx op0 = XEXP (x, 0);
42511 rtx op1 = XEXP (x, 1);
42512 int nbits;
42513 if (CONST_INT_P (XEXP (x, 1)))
42515 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
42516 for (nbits = 0; value != 0; value &= value - 1)
42517 nbits++;
42519 else
42520 /* This is arbitrary. */
42521 nbits = 7;
42523 /* Compute costs correctly for widening multiplication. */
42524 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
42525 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
42526 == GET_MODE_SIZE (mode))
42528 int is_mulwiden = 0;
42529 machine_mode inner_mode = GET_MODE (op0);
42531 if (GET_CODE (op0) == GET_CODE (op1))
42532 is_mulwiden = 1, op1 = XEXP (op1, 0);
42533 else if (CONST_INT_P (op1))
42535 if (GET_CODE (op0) == SIGN_EXTEND)
42536 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
42537 == INTVAL (op1);
42538 else
42539 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
42542 if (is_mulwiden)
42543 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
42546 *total = (cost->mult_init[MODE_INDEX (mode)]
42547 + nbits * cost->mult_bit
42548 + rtx_cost (op0, outer_code, opno, speed)
42549 + rtx_cost (op1, outer_code, opno, speed));
42551 return true;
42554 case DIV:
42555 case UDIV:
42556 case MOD:
42557 case UMOD:
42558 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42559 /* ??? SSE cost should be used here. */
42560 *total = cost->fdiv;
42561 else if (X87_FLOAT_MODE_P (mode))
42562 *total = cost->fdiv;
42563 else if (FLOAT_MODE_P (mode))
42564 /* ??? SSE vector cost should be used here. */
42565 *total = cost->fdiv;
42566 else
42567 *total = cost->divide[MODE_INDEX (mode)];
42568 return false;
42570 case PLUS:
42571 if (GET_MODE_CLASS (mode) == MODE_INT
42572 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
42574 if (GET_CODE (XEXP (x, 0)) == PLUS
42575 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
42576 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
42577 && CONSTANT_P (XEXP (x, 1)))
42579 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
42580 if (val == 2 || val == 4 || val == 8)
42582 *total = cost->lea;
42583 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42584 outer_code, opno, speed);
42585 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
42586 outer_code, opno, speed);
42587 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42588 return true;
42591 else if (GET_CODE (XEXP (x, 0)) == MULT
42592 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
42594 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
42595 if (val == 2 || val == 4 || val == 8)
42597 *total = cost->lea;
42598 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42599 outer_code, opno, speed);
42600 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42601 return true;
42604 else if (GET_CODE (XEXP (x, 0)) == PLUS)
42606 *total = cost->lea;
42607 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42608 outer_code, opno, speed);
42609 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42610 outer_code, opno, speed);
42611 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42612 return true;
42615 /* FALLTHRU */
42617 case MINUS:
42618 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42620 /* ??? SSE cost should be used here. */
42621 *total = cost->fadd;
42622 return false;
42624 else if (X87_FLOAT_MODE_P (mode))
42626 *total = cost->fadd;
42627 return false;
42629 else if (FLOAT_MODE_P (mode))
42631 /* ??? SSE vector cost should be used here. */
42632 *total = cost->fadd;
42633 return false;
42635 /* FALLTHRU */
42637 case AND:
42638 case IOR:
42639 case XOR:
42640 if (GET_MODE_CLASS (mode) == MODE_INT
42641 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42643 *total = (cost->add * 2
42644 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
42645 << (GET_MODE (XEXP (x, 0)) != DImode))
42646 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
42647 << (GET_MODE (XEXP (x, 1)) != DImode)));
42648 return true;
42650 /* FALLTHRU */
42652 case NEG:
42653 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42655 /* ??? SSE cost should be used here. */
42656 *total = cost->fchs;
42657 return false;
42659 else if (X87_FLOAT_MODE_P (mode))
42661 *total = cost->fchs;
42662 return false;
42664 else if (FLOAT_MODE_P (mode))
42666 /* ??? SSE vector cost should be used here. */
42667 *total = cost->fchs;
42668 return false;
42670 /* FALLTHRU */
42672 case NOT:
42673 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42675 /* ??? Should be SSE vector operation cost. */
42676 /* At least for published AMD latencies, this really is the same
42677 as the latency for a simple fpu operation like fabs. */
42678 *total = cost->fabs;
42680 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42681 *total = cost->add * 2;
42682 else
42683 *total = cost->add;
42684 return false;
42686 case COMPARE:
42687 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
42688 && XEXP (XEXP (x, 0), 1) == const1_rtx
42689 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
42690 && XEXP (x, 1) == const0_rtx)
42692 /* This kind of construct is implemented using test[bwl].
42693 Treat it as if we had an AND. */
42694 *total = (cost->add
42695 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
42696 + rtx_cost (const1_rtx, outer_code, opno, speed));
42697 return true;
42700 /* The embedded comparison operand is completely free. */
42701 if (!general_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0)))
42702 && XEXP (x, 1) == const0_rtx)
42703 *total = 0;
42705 return false;
42707 case FLOAT_EXTEND:
42708 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
42709 *total = 0;
42710 return false;
42712 case ABS:
42713 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42714 /* ??? SSE cost should be used here. */
42715 *total = cost->fabs;
42716 else if (X87_FLOAT_MODE_P (mode))
42717 *total = cost->fabs;
42718 else if (FLOAT_MODE_P (mode))
42719 /* ??? SSE vector cost should be used here. */
42720 *total = cost->fabs;
42721 return false;
42723 case SQRT:
42724 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42725 /* ??? SSE cost should be used here. */
42726 *total = cost->fsqrt;
42727 else if (X87_FLOAT_MODE_P (mode))
42728 *total = cost->fsqrt;
42729 else if (FLOAT_MODE_P (mode))
42730 /* ??? SSE vector cost should be used here. */
42731 *total = cost->fsqrt;
42732 return false;
42734 case UNSPEC:
42735 if (XINT (x, 1) == UNSPEC_TP)
42736 *total = 0;
42737 return false;
42739 case VEC_SELECT:
42740 case VEC_CONCAT:
42741 case VEC_DUPLICATE:
42742 /* ??? Assume all of these vector manipulation patterns are
42743 recognizable. In which case they all pretty much have the
42744 same cost. */
42745 *total = cost->fabs;
42746 return true;
42747 case VEC_MERGE:
42748 mask = XEXP (x, 2);
42749 /* This is masked instruction, assume the same cost,
42750 as nonmasked variant. */
42751 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
42752 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
42753 else
42754 *total = cost->fabs;
42755 return true;
42757 default:
42758 return false;
42762 #if TARGET_MACHO
42764 static int current_machopic_label_num;
42766 /* Given a symbol name and its associated stub, write out the
42767 definition of the stub. */
42769 void
42770 machopic_output_stub (FILE *file, const char *symb, const char *stub)
42772 unsigned int length;
42773 char *binder_name, *symbol_name, lazy_ptr_name[32];
42774 int label = ++current_machopic_label_num;
42776 /* For 64-bit we shouldn't get here. */
42777 gcc_assert (!TARGET_64BIT);
42779 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
42780 symb = targetm.strip_name_encoding (symb);
42782 length = strlen (stub);
42783 binder_name = XALLOCAVEC (char, length + 32);
42784 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
42786 length = strlen (symb);
42787 symbol_name = XALLOCAVEC (char, length + 32);
42788 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
42790 sprintf (lazy_ptr_name, "L%d$lz", label);
42792 if (MACHOPIC_ATT_STUB)
42793 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
42794 else if (MACHOPIC_PURE)
42795 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
42796 else
42797 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
42799 fprintf (file, "%s:\n", stub);
42800 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42802 if (MACHOPIC_ATT_STUB)
42804 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
42806 else if (MACHOPIC_PURE)
42808 /* PIC stub. */
42809 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42810 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
42811 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
42812 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
42813 label, lazy_ptr_name, label);
42814 fprintf (file, "\tjmp\t*%%ecx\n");
42816 else
42817 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
42819 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
42820 it needs no stub-binding-helper. */
42821 if (MACHOPIC_ATT_STUB)
42822 return;
42824 fprintf (file, "%s:\n", binder_name);
42826 if (MACHOPIC_PURE)
42828 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
42829 fprintf (file, "\tpushl\t%%ecx\n");
42831 else
42832 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
42834 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
42836 /* N.B. Keep the correspondence of these
42837 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
42838 old-pic/new-pic/non-pic stubs; altering this will break
42839 compatibility with existing dylibs. */
42840 if (MACHOPIC_PURE)
42842 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42843 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
42845 else
42846 /* 16-byte -mdynamic-no-pic stub. */
42847 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
42849 fprintf (file, "%s:\n", lazy_ptr_name);
42850 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42851 fprintf (file, ASM_LONG "%s\n", binder_name);
42853 #endif /* TARGET_MACHO */
42855 /* Order the registers for register allocator. */
42857 void
42858 x86_order_regs_for_local_alloc (void)
42860 int pos = 0;
42861 int i;
42863 /* First allocate the local general purpose registers. */
42864 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42865 if (GENERAL_REGNO_P (i) && call_used_regs[i])
42866 reg_alloc_order [pos++] = i;
42868 /* Global general purpose registers. */
42869 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42870 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
42871 reg_alloc_order [pos++] = i;
42873 /* x87 registers come first in case we are doing FP math
42874 using them. */
42875 if (!TARGET_SSE_MATH)
42876 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42877 reg_alloc_order [pos++] = i;
42879 /* SSE registers. */
42880 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
42881 reg_alloc_order [pos++] = i;
42882 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
42883 reg_alloc_order [pos++] = i;
42885 /* Extended REX SSE registers. */
42886 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
42887 reg_alloc_order [pos++] = i;
42889 /* Mask register. */
42890 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
42891 reg_alloc_order [pos++] = i;
42893 /* MPX bound registers. */
42894 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
42895 reg_alloc_order [pos++] = i;
42897 /* x87 registers. */
42898 if (TARGET_SSE_MATH)
42899 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42900 reg_alloc_order [pos++] = i;
42902 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
42903 reg_alloc_order [pos++] = i;
42905 /* Initialize the rest of array as we do not allocate some registers
42906 at all. */
42907 while (pos < FIRST_PSEUDO_REGISTER)
42908 reg_alloc_order [pos++] = 0;
42911 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
42912 in struct attribute_spec handler. */
42913 static tree
42914 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
42915 tree args,
42916 int,
42917 bool *no_add_attrs)
42919 if (TREE_CODE (*node) != FUNCTION_TYPE
42920 && TREE_CODE (*node) != METHOD_TYPE
42921 && TREE_CODE (*node) != FIELD_DECL
42922 && TREE_CODE (*node) != TYPE_DECL)
42924 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42925 name);
42926 *no_add_attrs = true;
42927 return NULL_TREE;
42929 if (TARGET_64BIT)
42931 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
42932 name);
42933 *no_add_attrs = true;
42934 return NULL_TREE;
42936 if (is_attribute_p ("callee_pop_aggregate_return", name))
42938 tree cst;
42940 cst = TREE_VALUE (args);
42941 if (TREE_CODE (cst) != INTEGER_CST)
42943 warning (OPT_Wattributes,
42944 "%qE attribute requires an integer constant argument",
42945 name);
42946 *no_add_attrs = true;
42948 else if (compare_tree_int (cst, 0) != 0
42949 && compare_tree_int (cst, 1) != 0)
42951 warning (OPT_Wattributes,
42952 "argument to %qE attribute is neither zero, nor one",
42953 name);
42954 *no_add_attrs = true;
42957 return NULL_TREE;
42960 return NULL_TREE;
42963 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
42964 struct attribute_spec.handler. */
42965 static tree
42966 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
42967 bool *no_add_attrs)
42969 if (TREE_CODE (*node) != FUNCTION_TYPE
42970 && TREE_CODE (*node) != METHOD_TYPE
42971 && TREE_CODE (*node) != FIELD_DECL
42972 && TREE_CODE (*node) != TYPE_DECL)
42974 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42975 name);
42976 *no_add_attrs = true;
42977 return NULL_TREE;
42980 /* Can combine regparm with all attributes but fastcall. */
42981 if (is_attribute_p ("ms_abi", name))
42983 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
42985 error ("ms_abi and sysv_abi attributes are not compatible");
42988 return NULL_TREE;
42990 else if (is_attribute_p ("sysv_abi", name))
42992 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
42994 error ("ms_abi and sysv_abi attributes are not compatible");
42997 return NULL_TREE;
43000 return NULL_TREE;
43003 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
43004 struct attribute_spec.handler. */
43005 static tree
43006 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
43007 bool *no_add_attrs)
43009 tree *type = NULL;
43010 if (DECL_P (*node))
43012 if (TREE_CODE (*node) == TYPE_DECL)
43013 type = &TREE_TYPE (*node);
43015 else
43016 type = node;
43018 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
43020 warning (OPT_Wattributes, "%qE attribute ignored",
43021 name);
43022 *no_add_attrs = true;
43025 else if ((is_attribute_p ("ms_struct", name)
43026 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
43027 || ((is_attribute_p ("gcc_struct", name)
43028 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
43030 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
43031 name);
43032 *no_add_attrs = true;
43035 return NULL_TREE;
43038 static tree
43039 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
43040 bool *no_add_attrs)
43042 if (TREE_CODE (*node) != FUNCTION_DECL)
43044 warning (OPT_Wattributes, "%qE attribute only applies to functions",
43045 name);
43046 *no_add_attrs = true;
43048 return NULL_TREE;
43051 static bool
43052 ix86_ms_bitfield_layout_p (const_tree record_type)
43054 return ((TARGET_MS_BITFIELD_LAYOUT
43055 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
43056 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
43059 /* Returns an expression indicating where the this parameter is
43060 located on entry to the FUNCTION. */
43062 static rtx
43063 x86_this_parameter (tree function)
43065 tree type = TREE_TYPE (function);
43066 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
43067 int nregs;
43069 if (TARGET_64BIT)
43071 const int *parm_regs;
43073 if (ix86_function_type_abi (type) == MS_ABI)
43074 parm_regs = x86_64_ms_abi_int_parameter_registers;
43075 else
43076 parm_regs = x86_64_int_parameter_registers;
43077 return gen_rtx_REG (Pmode, parm_regs[aggr]);
43080 nregs = ix86_function_regparm (type, function);
43082 if (nregs > 0 && !stdarg_p (type))
43084 int regno;
43085 unsigned int ccvt = ix86_get_callcvt (type);
43087 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
43088 regno = aggr ? DX_REG : CX_REG;
43089 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
43091 regno = CX_REG;
43092 if (aggr)
43093 return gen_rtx_MEM (SImode,
43094 plus_constant (Pmode, stack_pointer_rtx, 4));
43096 else
43098 regno = AX_REG;
43099 if (aggr)
43101 regno = DX_REG;
43102 if (nregs == 1)
43103 return gen_rtx_MEM (SImode,
43104 plus_constant (Pmode,
43105 stack_pointer_rtx, 4));
43108 return gen_rtx_REG (SImode, regno);
43111 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
43112 aggr ? 8 : 4));
43115 /* Determine whether x86_output_mi_thunk can succeed. */
43117 static bool
43118 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
43119 const_tree function)
43121 /* 64-bit can handle anything. */
43122 if (TARGET_64BIT)
43123 return true;
43125 /* For 32-bit, everything's fine if we have one free register. */
43126 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
43127 return true;
43129 /* Need a free register for vcall_offset. */
43130 if (vcall_offset)
43131 return false;
43133 /* Need a free register for GOT references. */
43134 if (flag_pic && !targetm.binds_local_p (function))
43135 return false;
43137 /* Otherwise ok. */
43138 return true;
43141 /* Output the assembler code for a thunk function. THUNK_DECL is the
43142 declaration for the thunk function itself, FUNCTION is the decl for
43143 the target function. DELTA is an immediate constant offset to be
43144 added to THIS. If VCALL_OFFSET is nonzero, the word at
43145 *(*this + vcall_offset) should be added to THIS. */
43147 static void
43148 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
43149 HOST_WIDE_INT vcall_offset, tree function)
43151 rtx this_param = x86_this_parameter (function);
43152 rtx this_reg, tmp, fnaddr;
43153 unsigned int tmp_regno;
43154 rtx_insn *insn;
43156 if (TARGET_64BIT)
43157 tmp_regno = R10_REG;
43158 else
43160 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
43161 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
43162 tmp_regno = AX_REG;
43163 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
43164 tmp_regno = DX_REG;
43165 else
43166 tmp_regno = CX_REG;
43169 emit_note (NOTE_INSN_PROLOGUE_END);
43171 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
43172 pull it in now and let DELTA benefit. */
43173 if (REG_P (this_param))
43174 this_reg = this_param;
43175 else if (vcall_offset)
43177 /* Put the this parameter into %eax. */
43178 this_reg = gen_rtx_REG (Pmode, AX_REG);
43179 emit_move_insn (this_reg, this_param);
43181 else
43182 this_reg = NULL_RTX;
43184 /* Adjust the this parameter by a fixed constant. */
43185 if (delta)
43187 rtx delta_rtx = GEN_INT (delta);
43188 rtx delta_dst = this_reg ? this_reg : this_param;
43190 if (TARGET_64BIT)
43192 if (!x86_64_general_operand (delta_rtx, Pmode))
43194 tmp = gen_rtx_REG (Pmode, tmp_regno);
43195 emit_move_insn (tmp, delta_rtx);
43196 delta_rtx = tmp;
43200 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
43203 /* Adjust the this parameter by a value stored in the vtable. */
43204 if (vcall_offset)
43206 rtx vcall_addr, vcall_mem, this_mem;
43208 tmp = gen_rtx_REG (Pmode, tmp_regno);
43210 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
43211 if (Pmode != ptr_mode)
43212 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
43213 emit_move_insn (tmp, this_mem);
43215 /* Adjust the this parameter. */
43216 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
43217 if (TARGET_64BIT
43218 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
43220 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
43221 emit_move_insn (tmp2, GEN_INT (vcall_offset));
43222 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
43225 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
43226 if (Pmode != ptr_mode)
43227 emit_insn (gen_addsi_1_zext (this_reg,
43228 gen_rtx_REG (ptr_mode,
43229 REGNO (this_reg)),
43230 vcall_mem));
43231 else
43232 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
43235 /* If necessary, drop THIS back to its stack slot. */
43236 if (this_reg && this_reg != this_param)
43237 emit_move_insn (this_param, this_reg);
43239 fnaddr = XEXP (DECL_RTL (function), 0);
43240 if (TARGET_64BIT)
43242 if (!flag_pic || targetm.binds_local_p (function)
43243 || TARGET_PECOFF)
43245 else
43247 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
43248 tmp = gen_rtx_CONST (Pmode, tmp);
43249 fnaddr = gen_const_mem (Pmode, tmp);
43252 else
43254 if (!flag_pic || targetm.binds_local_p (function))
43256 #if TARGET_MACHO
43257 else if (TARGET_MACHO)
43259 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
43260 fnaddr = XEXP (fnaddr, 0);
43262 #endif /* TARGET_MACHO */
43263 else
43265 tmp = gen_rtx_REG (Pmode, CX_REG);
43266 output_set_got (tmp, NULL_RTX);
43268 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
43269 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
43270 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
43271 fnaddr = gen_const_mem (Pmode, fnaddr);
43275 /* Our sibling call patterns do not allow memories, because we have no
43276 predicate that can distinguish between frame and non-frame memory.
43277 For our purposes here, we can get away with (ab)using a jump pattern,
43278 because we're going to do no optimization. */
43279 if (MEM_P (fnaddr))
43281 if (sibcall_insn_operand (fnaddr, word_mode))
43283 fnaddr = XEXP (DECL_RTL (function), 0);
43284 tmp = gen_rtx_MEM (QImode, fnaddr);
43285 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
43286 tmp = emit_call_insn (tmp);
43287 SIBLING_CALL_P (tmp) = 1;
43289 else
43290 emit_jump_insn (gen_indirect_jump (fnaddr));
43292 else
43294 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
43296 // CM_LARGE_PIC always uses pseudo PIC register which is
43297 // uninitialized. Since FUNCTION is local and calling it
43298 // doesn't go through PLT, we use scratch register %r11 as
43299 // PIC register and initialize it here.
43300 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
43301 ix86_init_large_pic_reg (tmp_regno);
43302 fnaddr = legitimize_pic_address (fnaddr,
43303 gen_rtx_REG (Pmode, tmp_regno));
43306 if (!sibcall_insn_operand (fnaddr, word_mode))
43308 tmp = gen_rtx_REG (word_mode, tmp_regno);
43309 if (GET_MODE (fnaddr) != word_mode)
43310 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
43311 emit_move_insn (tmp, fnaddr);
43312 fnaddr = tmp;
43315 tmp = gen_rtx_MEM (QImode, fnaddr);
43316 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
43317 tmp = emit_call_insn (tmp);
43318 SIBLING_CALL_P (tmp) = 1;
43320 emit_barrier ();
43322 /* Emit just enough of rest_of_compilation to get the insns emitted.
43323 Note that use_thunk calls assemble_start_function et al. */
43324 insn = get_insns ();
43325 shorten_branches (insn);
43326 final_start_function (insn, file, 1);
43327 final (insn, file, 1);
43328 final_end_function ();
43331 static void
43332 x86_file_start (void)
43334 default_file_start ();
43335 if (TARGET_16BIT)
43336 fputs ("\t.code16gcc\n", asm_out_file);
43337 #if TARGET_MACHO
43338 darwin_file_start ();
43339 #endif
43340 if (X86_FILE_START_VERSION_DIRECTIVE)
43341 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
43342 if (X86_FILE_START_FLTUSED)
43343 fputs ("\t.global\t__fltused\n", asm_out_file);
43344 if (ix86_asm_dialect == ASM_INTEL)
43345 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
43349 x86_field_alignment (tree field, int computed)
43351 machine_mode mode;
43352 tree type = TREE_TYPE (field);
43354 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
43355 return computed;
43356 if (TARGET_IAMCU)
43357 return iamcu_alignment (type, computed);
43358 mode = TYPE_MODE (strip_array_types (type));
43359 if (mode == DFmode || mode == DCmode
43360 || GET_MODE_CLASS (mode) == MODE_INT
43361 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
43362 return MIN (32, computed);
43363 return computed;
43366 /* Print call to TARGET to FILE. */
43368 static void
43369 x86_print_call_or_nop (FILE *file, const char *target)
43371 if (flag_nop_mcount)
43372 fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */
43373 else
43374 fprintf (file, "1:\tcall\t%s\n", target);
43377 /* Output assembler code to FILE to increment profiler label # LABELNO
43378 for profiling a function entry. */
43379 void
43380 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
43382 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
43383 : MCOUNT_NAME);
43384 if (TARGET_64BIT)
43386 #ifndef NO_PROFILE_COUNTERS
43387 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
43388 #endif
43390 if (!TARGET_PECOFF && flag_pic)
43391 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
43392 else
43393 x86_print_call_or_nop (file, mcount_name);
43395 else if (flag_pic)
43397 #ifndef NO_PROFILE_COUNTERS
43398 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
43399 LPREFIX, labelno);
43400 #endif
43401 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
43403 else
43405 #ifndef NO_PROFILE_COUNTERS
43406 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
43407 LPREFIX, labelno);
43408 #endif
43409 x86_print_call_or_nop (file, mcount_name);
43412 if (flag_record_mcount)
43414 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
43415 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
43416 fprintf (file, "\t.previous\n");
43420 /* We don't have exact information about the insn sizes, but we may assume
43421 quite safely that we are informed about all 1 byte insns and memory
43422 address sizes. This is enough to eliminate unnecessary padding in
43423 99% of cases. */
43425 static int
43426 min_insn_size (rtx_insn *insn)
43428 int l = 0, len;
43430 if (!INSN_P (insn) || !active_insn_p (insn))
43431 return 0;
43433 /* Discard alignments we've emit and jump instructions. */
43434 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
43435 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
43436 return 0;
43438 /* Important case - calls are always 5 bytes.
43439 It is common to have many calls in the row. */
43440 if (CALL_P (insn)
43441 && symbolic_reference_mentioned_p (PATTERN (insn))
43442 && !SIBLING_CALL_P (insn))
43443 return 5;
43444 len = get_attr_length (insn);
43445 if (len <= 1)
43446 return 1;
43448 /* For normal instructions we rely on get_attr_length being exact,
43449 with a few exceptions. */
43450 if (!JUMP_P (insn))
43452 enum attr_type type = get_attr_type (insn);
43454 switch (type)
43456 case TYPE_MULTI:
43457 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
43458 || asm_noperands (PATTERN (insn)) >= 0)
43459 return 0;
43460 break;
43461 case TYPE_OTHER:
43462 case TYPE_FCMP:
43463 break;
43464 default:
43465 /* Otherwise trust get_attr_length. */
43466 return len;
43469 l = get_attr_length_address (insn);
43470 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
43471 l = 4;
43473 if (l)
43474 return 1+l;
43475 else
43476 return 2;
43479 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43481 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
43482 window. */
43484 static void
43485 ix86_avoid_jump_mispredicts (void)
43487 rtx_insn *insn, *start = get_insns ();
43488 int nbytes = 0, njumps = 0;
43489 bool isjump = false;
43491 /* Look for all minimal intervals of instructions containing 4 jumps.
43492 The intervals are bounded by START and INSN. NBYTES is the total
43493 size of instructions in the interval including INSN and not including
43494 START. When the NBYTES is smaller than 16 bytes, it is possible
43495 that the end of START and INSN ends up in the same 16byte page.
43497 The smallest offset in the page INSN can start is the case where START
43498 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
43499 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
43501 Don't consider asm goto as jump, while it can contain a jump, it doesn't
43502 have to, control transfer to label(s) can be performed through other
43503 means, and also we estimate minimum length of all asm stmts as 0. */
43504 for (insn = start; insn; insn = NEXT_INSN (insn))
43506 int min_size;
43508 if (LABEL_P (insn))
43510 int align = label_to_alignment (insn);
43511 int max_skip = label_to_max_skip (insn);
43513 if (max_skip > 15)
43514 max_skip = 15;
43515 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
43516 already in the current 16 byte page, because otherwise
43517 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
43518 bytes to reach 16 byte boundary. */
43519 if (align <= 0
43520 || (align <= 3 && max_skip != (1 << align) - 1))
43521 max_skip = 0;
43522 if (dump_file)
43523 fprintf (dump_file, "Label %i with max_skip %i\n",
43524 INSN_UID (insn), max_skip);
43525 if (max_skip)
43527 while (nbytes + max_skip >= 16)
43529 start = NEXT_INSN (start);
43530 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43531 || CALL_P (start))
43532 njumps--, isjump = true;
43533 else
43534 isjump = false;
43535 nbytes -= min_insn_size (start);
43538 continue;
43541 min_size = min_insn_size (insn);
43542 nbytes += min_size;
43543 if (dump_file)
43544 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
43545 INSN_UID (insn), min_size);
43546 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
43547 || CALL_P (insn))
43548 njumps++;
43549 else
43550 continue;
43552 while (njumps > 3)
43554 start = NEXT_INSN (start);
43555 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43556 || CALL_P (start))
43557 njumps--, isjump = true;
43558 else
43559 isjump = false;
43560 nbytes -= min_insn_size (start);
43562 gcc_assert (njumps >= 0);
43563 if (dump_file)
43564 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
43565 INSN_UID (start), INSN_UID (insn), nbytes);
43567 if (njumps == 3 && isjump && nbytes < 16)
43569 int padsize = 15 - nbytes + min_insn_size (insn);
43571 if (dump_file)
43572 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
43573 INSN_UID (insn), padsize);
43574 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
43578 #endif
43580 /* AMD Athlon works faster
43581 when RET is not destination of conditional jump or directly preceded
43582 by other jump instruction. We avoid the penalty by inserting NOP just
43583 before the RET instructions in such cases. */
43584 static void
43585 ix86_pad_returns (void)
43587 edge e;
43588 edge_iterator ei;
43590 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43592 basic_block bb = e->src;
43593 rtx_insn *ret = BB_END (bb);
43594 rtx_insn *prev;
43595 bool replace = false;
43597 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
43598 || optimize_bb_for_size_p (bb))
43599 continue;
43600 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
43601 if (active_insn_p (prev) || LABEL_P (prev))
43602 break;
43603 if (prev && LABEL_P (prev))
43605 edge e;
43606 edge_iterator ei;
43608 FOR_EACH_EDGE (e, ei, bb->preds)
43609 if (EDGE_FREQUENCY (e) && e->src->index >= 0
43610 && !(e->flags & EDGE_FALLTHRU))
43612 replace = true;
43613 break;
43616 if (!replace)
43618 prev = prev_active_insn (ret);
43619 if (prev
43620 && ((JUMP_P (prev) && any_condjump_p (prev))
43621 || CALL_P (prev)))
43622 replace = true;
43623 /* Empty functions get branch mispredict even when
43624 the jump destination is not visible to us. */
43625 if (!prev && !optimize_function_for_size_p (cfun))
43626 replace = true;
43628 if (replace)
43630 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
43631 delete_insn (ret);
43636 /* Count the minimum number of instructions in BB. Return 4 if the
43637 number of instructions >= 4. */
43639 static int
43640 ix86_count_insn_bb (basic_block bb)
43642 rtx_insn *insn;
43643 int insn_count = 0;
43645 /* Count number of instructions in this block. Return 4 if the number
43646 of instructions >= 4. */
43647 FOR_BB_INSNS (bb, insn)
43649 /* Only happen in exit blocks. */
43650 if (JUMP_P (insn)
43651 && ANY_RETURN_P (PATTERN (insn)))
43652 break;
43654 if (NONDEBUG_INSN_P (insn)
43655 && GET_CODE (PATTERN (insn)) != USE
43656 && GET_CODE (PATTERN (insn)) != CLOBBER)
43658 insn_count++;
43659 if (insn_count >= 4)
43660 return insn_count;
43664 return insn_count;
43668 /* Count the minimum number of instructions in code path in BB.
43669 Return 4 if the number of instructions >= 4. */
43671 static int
43672 ix86_count_insn (basic_block bb)
43674 edge e;
43675 edge_iterator ei;
43676 int min_prev_count;
43678 /* Only bother counting instructions along paths with no
43679 more than 2 basic blocks between entry and exit. Given
43680 that BB has an edge to exit, determine if a predecessor
43681 of BB has an edge from entry. If so, compute the number
43682 of instructions in the predecessor block. If there
43683 happen to be multiple such blocks, compute the minimum. */
43684 min_prev_count = 4;
43685 FOR_EACH_EDGE (e, ei, bb->preds)
43687 edge prev_e;
43688 edge_iterator prev_ei;
43690 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43692 min_prev_count = 0;
43693 break;
43695 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
43697 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43699 int count = ix86_count_insn_bb (e->src);
43700 if (count < min_prev_count)
43701 min_prev_count = count;
43702 break;
43707 if (min_prev_count < 4)
43708 min_prev_count += ix86_count_insn_bb (bb);
43710 return min_prev_count;
43713 /* Pad short function to 4 instructions. */
43715 static void
43716 ix86_pad_short_function (void)
43718 edge e;
43719 edge_iterator ei;
43721 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43723 rtx_insn *ret = BB_END (e->src);
43724 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
43726 int insn_count = ix86_count_insn (e->src);
43728 /* Pad short function. */
43729 if (insn_count < 4)
43731 rtx_insn *insn = ret;
43733 /* Find epilogue. */
43734 while (insn
43735 && (!NOTE_P (insn)
43736 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
43737 insn = PREV_INSN (insn);
43739 if (!insn)
43740 insn = ret;
43742 /* Two NOPs count as one instruction. */
43743 insn_count = 2 * (4 - insn_count);
43744 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
43750 /* Fix up a Windows system unwinder issue. If an EH region falls through into
43751 the epilogue, the Windows system unwinder will apply epilogue logic and
43752 produce incorrect offsets. This can be avoided by adding a nop between
43753 the last insn that can throw and the first insn of the epilogue. */
43755 static void
43756 ix86_seh_fixup_eh_fallthru (void)
43758 edge e;
43759 edge_iterator ei;
43761 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43763 rtx_insn *insn, *next;
43765 /* Find the beginning of the epilogue. */
43766 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
43767 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
43768 break;
43769 if (insn == NULL)
43770 continue;
43772 /* We only care about preceding insns that can throw. */
43773 insn = prev_active_insn (insn);
43774 if (insn == NULL || !can_throw_internal (insn))
43775 continue;
43777 /* Do not separate calls from their debug information. */
43778 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
43779 if (NOTE_P (next)
43780 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
43781 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
43782 insn = next;
43783 else
43784 break;
43786 emit_insn_after (gen_nops (const1_rtx), insn);
43790 /* Implement machine specific optimizations. We implement padding of returns
43791 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
43792 static void
43793 ix86_reorg (void)
43795 /* We are freeing block_for_insn in the toplev to keep compatibility
43796 with old MDEP_REORGS that are not CFG based. Recompute it now. */
43797 compute_bb_for_insn ();
43799 if (TARGET_SEH && current_function_has_exception_handlers ())
43800 ix86_seh_fixup_eh_fallthru ();
43802 if (optimize && optimize_function_for_speed_p (cfun))
43804 if (TARGET_PAD_SHORT_FUNCTION)
43805 ix86_pad_short_function ();
43806 else if (TARGET_PAD_RETURNS)
43807 ix86_pad_returns ();
43808 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43809 if (TARGET_FOUR_JUMP_LIMIT)
43810 ix86_avoid_jump_mispredicts ();
43811 #endif
43815 /* Return nonzero when QImode register that must be represented via REX prefix
43816 is used. */
43817 bool
43818 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
43820 int i;
43821 extract_insn_cached (insn);
43822 for (i = 0; i < recog_data.n_operands; i++)
43823 if (GENERAL_REG_P (recog_data.operand[i])
43824 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
43825 return true;
43826 return false;
43829 /* Return true when INSN mentions register that must be encoded using REX
43830 prefix. */
43831 bool
43832 x86_extended_reg_mentioned_p (rtx insn)
43834 subrtx_iterator::array_type array;
43835 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
43837 const_rtx x = *iter;
43838 if (REG_P (x)
43839 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
43840 return true;
43842 return false;
43845 /* If profitable, negate (without causing overflow) integer constant
43846 of mode MODE at location LOC. Return true in this case. */
43847 bool
43848 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
43850 HOST_WIDE_INT val;
43852 if (!CONST_INT_P (*loc))
43853 return false;
43855 switch (mode)
43857 case DImode:
43858 /* DImode x86_64 constants must fit in 32 bits. */
43859 gcc_assert (x86_64_immediate_operand (*loc, mode));
43861 mode = SImode;
43862 break;
43864 case SImode:
43865 case HImode:
43866 case QImode:
43867 break;
43869 default:
43870 gcc_unreachable ();
43873 /* Avoid overflows. */
43874 if (mode_signbit_p (mode, *loc))
43875 return false;
43877 val = INTVAL (*loc);
43879 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
43880 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
43881 if ((val < 0 && val != -128)
43882 || val == 128)
43884 *loc = GEN_INT (-val);
43885 return true;
43888 return false;
43891 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
43892 optabs would emit if we didn't have TFmode patterns. */
43894 void
43895 x86_emit_floatuns (rtx operands[2])
43897 rtx_code_label *neglab, *donelab;
43898 rtx i0, i1, f0, in, out;
43899 machine_mode mode, inmode;
43901 inmode = GET_MODE (operands[1]);
43902 gcc_assert (inmode == SImode || inmode == DImode);
43904 out = operands[0];
43905 in = force_reg (inmode, operands[1]);
43906 mode = GET_MODE (out);
43907 neglab = gen_label_rtx ();
43908 donelab = gen_label_rtx ();
43909 f0 = gen_reg_rtx (mode);
43911 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
43913 expand_float (out, in, 0);
43915 emit_jump_insn (gen_jump (donelab));
43916 emit_barrier ();
43918 emit_label (neglab);
43920 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
43921 1, OPTAB_DIRECT);
43922 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
43923 1, OPTAB_DIRECT);
43924 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
43926 expand_float (f0, i0, 0);
43928 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
43930 emit_label (donelab);
43933 static bool canonicalize_perm (struct expand_vec_perm_d *d);
43934 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
43935 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
43936 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
43938 /* Get a vector mode of the same size as the original but with elements
43939 twice as wide. This is only guaranteed to apply to integral vectors. */
43941 static inline machine_mode
43942 get_mode_wider_vector (machine_mode o)
43944 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
43945 machine_mode n = GET_MODE_WIDER_MODE (o);
43946 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
43947 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
43948 return n;
43951 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
43952 fill target with val via vec_duplicate. */
43954 static bool
43955 ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
43957 bool ok;
43958 rtx_insn *insn;
43959 rtx dup;
43961 /* First attempt to recognize VAL as-is. */
43962 dup = gen_rtx_VEC_DUPLICATE (mode, val);
43963 insn = emit_insn (gen_rtx_SET (target, dup));
43964 if (recog_memoized (insn) < 0)
43966 rtx_insn *seq;
43967 /* If that fails, force VAL into a register. */
43969 start_sequence ();
43970 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
43971 seq = get_insns ();
43972 end_sequence ();
43973 if (seq)
43974 emit_insn_before (seq, insn);
43976 ok = recog_memoized (insn) >= 0;
43977 gcc_assert (ok);
43979 return true;
43982 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43983 with all elements equal to VAR. Return true if successful. */
43985 static bool
43986 ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
43987 rtx target, rtx val)
43989 bool ok;
43991 switch (mode)
43993 case V2SImode:
43994 case V2SFmode:
43995 if (!mmx_ok)
43996 return false;
43997 /* FALLTHRU */
43999 case V4DFmode:
44000 case V4DImode:
44001 case V8SFmode:
44002 case V8SImode:
44003 case V2DFmode:
44004 case V2DImode:
44005 case V4SFmode:
44006 case V4SImode:
44007 case V16SImode:
44008 case V8DImode:
44009 case V16SFmode:
44010 case V8DFmode:
44011 return ix86_vector_duplicate_value (mode, target, val);
44013 case V4HImode:
44014 if (!mmx_ok)
44015 return false;
44016 if (TARGET_SSE || TARGET_3DNOW_A)
44018 rtx x;
44020 val = gen_lowpart (SImode, val);
44021 x = gen_rtx_TRUNCATE (HImode, val);
44022 x = gen_rtx_VEC_DUPLICATE (mode, x);
44023 emit_insn (gen_rtx_SET (target, x));
44024 return true;
44026 goto widen;
44028 case V8QImode:
44029 if (!mmx_ok)
44030 return false;
44031 goto widen;
44033 case V8HImode:
44034 if (TARGET_AVX2)
44035 return ix86_vector_duplicate_value (mode, target, val);
44037 if (TARGET_SSE2)
44039 struct expand_vec_perm_d dperm;
44040 rtx tmp1, tmp2;
44042 permute:
44043 memset (&dperm, 0, sizeof (dperm));
44044 dperm.target = target;
44045 dperm.vmode = mode;
44046 dperm.nelt = GET_MODE_NUNITS (mode);
44047 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
44048 dperm.one_operand_p = true;
44050 /* Extend to SImode using a paradoxical SUBREG. */
44051 tmp1 = gen_reg_rtx (SImode);
44052 emit_move_insn (tmp1, gen_lowpart (SImode, val));
44054 /* Insert the SImode value as low element of a V4SImode vector. */
44055 tmp2 = gen_reg_rtx (V4SImode);
44056 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
44057 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
44059 ok = (expand_vec_perm_1 (&dperm)
44060 || expand_vec_perm_broadcast_1 (&dperm));
44061 gcc_assert (ok);
44062 return ok;
44064 goto widen;
44066 case V16QImode:
44067 if (TARGET_AVX2)
44068 return ix86_vector_duplicate_value (mode, target, val);
44070 if (TARGET_SSE2)
44071 goto permute;
44072 goto widen;
44074 widen:
44075 /* Replicate the value once into the next wider mode and recurse. */
44077 machine_mode smode, wsmode, wvmode;
44078 rtx x;
44080 smode = GET_MODE_INNER (mode);
44081 wvmode = get_mode_wider_vector (mode);
44082 wsmode = GET_MODE_INNER (wvmode);
44084 val = convert_modes (wsmode, smode, val, true);
44085 x = expand_simple_binop (wsmode, ASHIFT, val,
44086 GEN_INT (GET_MODE_BITSIZE (smode)),
44087 NULL_RTX, 1, OPTAB_LIB_WIDEN);
44088 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
44090 x = gen_reg_rtx (wvmode);
44091 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
44092 gcc_assert (ok);
44093 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
44094 return ok;
44097 case V16HImode:
44098 case V32QImode:
44099 if (TARGET_AVX2)
44100 return ix86_vector_duplicate_value (mode, target, val);
44101 else
44103 machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
44104 rtx x = gen_reg_rtx (hvmode);
44106 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
44107 gcc_assert (ok);
44109 x = gen_rtx_VEC_CONCAT (mode, x, x);
44110 emit_insn (gen_rtx_SET (target, x));
44112 return true;
44114 case V64QImode:
44115 case V32HImode:
44116 if (TARGET_AVX512BW)
44117 return ix86_vector_duplicate_value (mode, target, val);
44118 else
44120 machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
44121 rtx x = gen_reg_rtx (hvmode);
44123 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
44124 gcc_assert (ok);
44126 x = gen_rtx_VEC_CONCAT (mode, x, x);
44127 emit_insn (gen_rtx_SET (target, x));
44129 return true;
44131 default:
44132 return false;
44136 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
44137 whose ONE_VAR element is VAR, and other elements are zero. Return true
44138 if successful. */
44140 static bool
44141 ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
44142 rtx target, rtx var, int one_var)
44144 machine_mode vsimode;
44145 rtx new_target;
44146 rtx x, tmp;
44147 bool use_vector_set = false;
44149 switch (mode)
44151 case V2DImode:
44152 /* For SSE4.1, we normally use vector set. But if the second
44153 element is zero and inter-unit moves are OK, we use movq
44154 instead. */
44155 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
44156 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
44157 && one_var == 0));
44158 break;
44159 case V16QImode:
44160 case V4SImode:
44161 case V4SFmode:
44162 use_vector_set = TARGET_SSE4_1;
44163 break;
44164 case V8HImode:
44165 use_vector_set = TARGET_SSE2;
44166 break;
44167 case V4HImode:
44168 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
44169 break;
44170 case V32QImode:
44171 case V16HImode:
44172 case V8SImode:
44173 case V8SFmode:
44174 case V4DFmode:
44175 use_vector_set = TARGET_AVX;
44176 break;
44177 case V4DImode:
44178 /* Use ix86_expand_vector_set in 64bit mode only. */
44179 use_vector_set = TARGET_AVX && TARGET_64BIT;
44180 break;
44181 default:
44182 break;
44185 if (use_vector_set)
44187 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
44188 var = force_reg (GET_MODE_INNER (mode), var);
44189 ix86_expand_vector_set (mmx_ok, target, var, one_var);
44190 return true;
44193 switch (mode)
44195 case V2SFmode:
44196 case V2SImode:
44197 if (!mmx_ok)
44198 return false;
44199 /* FALLTHRU */
44201 case V2DFmode:
44202 case V2DImode:
44203 if (one_var != 0)
44204 return false;
44205 var = force_reg (GET_MODE_INNER (mode), var);
44206 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
44207 emit_insn (gen_rtx_SET (target, x));
44208 return true;
44210 case V4SFmode:
44211 case V4SImode:
44212 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
44213 new_target = gen_reg_rtx (mode);
44214 else
44215 new_target = target;
44216 var = force_reg (GET_MODE_INNER (mode), var);
44217 x = gen_rtx_VEC_DUPLICATE (mode, var);
44218 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
44219 emit_insn (gen_rtx_SET (new_target, x));
44220 if (one_var != 0)
44222 /* We need to shuffle the value to the correct position, so
44223 create a new pseudo to store the intermediate result. */
44225 /* With SSE2, we can use the integer shuffle insns. */
44226 if (mode != V4SFmode && TARGET_SSE2)
44228 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
44229 const1_rtx,
44230 GEN_INT (one_var == 1 ? 0 : 1),
44231 GEN_INT (one_var == 2 ? 0 : 1),
44232 GEN_INT (one_var == 3 ? 0 : 1)));
44233 if (target != new_target)
44234 emit_move_insn (target, new_target);
44235 return true;
44238 /* Otherwise convert the intermediate result to V4SFmode and
44239 use the SSE1 shuffle instructions. */
44240 if (mode != V4SFmode)
44242 tmp = gen_reg_rtx (V4SFmode);
44243 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
44245 else
44246 tmp = new_target;
44248 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
44249 const1_rtx,
44250 GEN_INT (one_var == 1 ? 0 : 1),
44251 GEN_INT (one_var == 2 ? 0+4 : 1+4),
44252 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
44254 if (mode != V4SFmode)
44255 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
44256 else if (tmp != target)
44257 emit_move_insn (target, tmp);
44259 else if (target != new_target)
44260 emit_move_insn (target, new_target);
44261 return true;
44263 case V8HImode:
44264 case V16QImode:
44265 vsimode = V4SImode;
44266 goto widen;
44267 case V4HImode:
44268 case V8QImode:
44269 if (!mmx_ok)
44270 return false;
44271 vsimode = V2SImode;
44272 goto widen;
44273 widen:
44274 if (one_var != 0)
44275 return false;
44277 /* Zero extend the variable element to SImode and recurse. */
44278 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
44280 x = gen_reg_rtx (vsimode);
44281 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
44282 var, one_var))
44283 gcc_unreachable ();
44285 emit_move_insn (target, gen_lowpart (mode, x));
44286 return true;
44288 default:
44289 return false;
44293 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
44294 consisting of the values in VALS. It is known that all elements
44295 except ONE_VAR are constants. Return true if successful. */
44297 static bool
44298 ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
44299 rtx target, rtx vals, int one_var)
44301 rtx var = XVECEXP (vals, 0, one_var);
44302 machine_mode wmode;
44303 rtx const_vec, x;
44305 const_vec = copy_rtx (vals);
44306 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
44307 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
44309 switch (mode)
44311 case V2DFmode:
44312 case V2DImode:
44313 case V2SFmode:
44314 case V2SImode:
44315 /* For the two element vectors, it's just as easy to use
44316 the general case. */
44317 return false;
44319 case V4DImode:
44320 /* Use ix86_expand_vector_set in 64bit mode only. */
44321 if (!TARGET_64BIT)
44322 return false;
44323 case V4DFmode:
44324 case V8SFmode:
44325 case V8SImode:
44326 case V16HImode:
44327 case V32QImode:
44328 case V4SFmode:
44329 case V4SImode:
44330 case V8HImode:
44331 case V4HImode:
44332 break;
44334 case V16QImode:
44335 if (TARGET_SSE4_1)
44336 break;
44337 wmode = V8HImode;
44338 goto widen;
44339 case V8QImode:
44340 wmode = V4HImode;
44341 goto widen;
44342 widen:
44343 /* There's no way to set one QImode entry easily. Combine
44344 the variable value with its adjacent constant value, and
44345 promote to an HImode set. */
44346 x = XVECEXP (vals, 0, one_var ^ 1);
44347 if (one_var & 1)
44349 var = convert_modes (HImode, QImode, var, true);
44350 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
44351 NULL_RTX, 1, OPTAB_LIB_WIDEN);
44352 x = GEN_INT (INTVAL (x) & 0xff);
44354 else
44356 var = convert_modes (HImode, QImode, var, true);
44357 x = gen_int_mode (INTVAL (x) << 8, HImode);
44359 if (x != const0_rtx)
44360 var = expand_simple_binop (HImode, IOR, var, x, var,
44361 1, OPTAB_LIB_WIDEN);
44363 x = gen_reg_rtx (wmode);
44364 emit_move_insn (x, gen_lowpart (wmode, const_vec));
44365 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
44367 emit_move_insn (target, gen_lowpart (mode, x));
44368 return true;
44370 default:
44371 return false;
44374 emit_move_insn (target, const_vec);
44375 ix86_expand_vector_set (mmx_ok, target, var, one_var);
44376 return true;
44379 /* A subroutine of ix86_expand_vector_init_general. Use vector
44380 concatenate to handle the most general case: all values variable,
44381 and none identical. */
44383 static void
44384 ix86_expand_vector_init_concat (machine_mode mode,
44385 rtx target, rtx *ops, int n)
44387 machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
44388 rtx first[16], second[8], third[4];
44389 rtvec v;
44390 int i, j;
44392 switch (n)
44394 case 2:
44395 switch (mode)
44397 case V16SImode:
44398 cmode = V8SImode;
44399 break;
44400 case V16SFmode:
44401 cmode = V8SFmode;
44402 break;
44403 case V8DImode:
44404 cmode = V4DImode;
44405 break;
44406 case V8DFmode:
44407 cmode = V4DFmode;
44408 break;
44409 case V8SImode:
44410 cmode = V4SImode;
44411 break;
44412 case V8SFmode:
44413 cmode = V4SFmode;
44414 break;
44415 case V4DImode:
44416 cmode = V2DImode;
44417 break;
44418 case V4DFmode:
44419 cmode = V2DFmode;
44420 break;
44421 case V4SImode:
44422 cmode = V2SImode;
44423 break;
44424 case V4SFmode:
44425 cmode = V2SFmode;
44426 break;
44427 case V2DImode:
44428 cmode = DImode;
44429 break;
44430 case V2SImode:
44431 cmode = SImode;
44432 break;
44433 case V2DFmode:
44434 cmode = DFmode;
44435 break;
44436 case V2SFmode:
44437 cmode = SFmode;
44438 break;
44439 default:
44440 gcc_unreachable ();
44443 if (!register_operand (ops[1], cmode))
44444 ops[1] = force_reg (cmode, ops[1]);
44445 if (!register_operand (ops[0], cmode))
44446 ops[0] = force_reg (cmode, ops[0]);
44447 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, ops[0],
44448 ops[1])));
44449 break;
44451 case 4:
44452 switch (mode)
44454 case V4DImode:
44455 cmode = V2DImode;
44456 break;
44457 case V4DFmode:
44458 cmode = V2DFmode;
44459 break;
44460 case V4SImode:
44461 cmode = V2SImode;
44462 break;
44463 case V4SFmode:
44464 cmode = V2SFmode;
44465 break;
44466 default:
44467 gcc_unreachable ();
44469 goto half;
44471 case 8:
44472 switch (mode)
44474 case V8DImode:
44475 cmode = V2DImode;
44476 hmode = V4DImode;
44477 break;
44478 case V8DFmode:
44479 cmode = V2DFmode;
44480 hmode = V4DFmode;
44481 break;
44482 case V8SImode:
44483 cmode = V2SImode;
44484 hmode = V4SImode;
44485 break;
44486 case V8SFmode:
44487 cmode = V2SFmode;
44488 hmode = V4SFmode;
44489 break;
44490 default:
44491 gcc_unreachable ();
44493 goto half;
44495 case 16:
44496 switch (mode)
44498 case V16SImode:
44499 cmode = V2SImode;
44500 hmode = V4SImode;
44501 gmode = V8SImode;
44502 break;
44503 case V16SFmode:
44504 cmode = V2SFmode;
44505 hmode = V4SFmode;
44506 gmode = V8SFmode;
44507 break;
44508 default:
44509 gcc_unreachable ();
44511 goto half;
44513 half:
44514 /* FIXME: We process inputs backward to help RA. PR 36222. */
44515 i = n - 1;
44516 j = (n >> 1) - 1;
44517 for (; i > 0; i -= 2, j--)
44519 first[j] = gen_reg_rtx (cmode);
44520 v = gen_rtvec (2, ops[i - 1], ops[i]);
44521 ix86_expand_vector_init (false, first[j],
44522 gen_rtx_PARALLEL (cmode, v));
44525 n >>= 1;
44526 if (n > 4)
44528 gcc_assert (hmode != VOIDmode);
44529 gcc_assert (gmode != VOIDmode);
44530 for (i = j = 0; i < n; i += 2, j++)
44532 second[j] = gen_reg_rtx (hmode);
44533 ix86_expand_vector_init_concat (hmode, second [j],
44534 &first [i], 2);
44536 n >>= 1;
44537 for (i = j = 0; i < n; i += 2, j++)
44539 third[j] = gen_reg_rtx (gmode);
44540 ix86_expand_vector_init_concat (gmode, third[j],
44541 &second[i], 2);
44543 n >>= 1;
44544 ix86_expand_vector_init_concat (mode, target, third, n);
44546 else if (n > 2)
44548 gcc_assert (hmode != VOIDmode);
44549 for (i = j = 0; i < n; i += 2, j++)
44551 second[j] = gen_reg_rtx (hmode);
44552 ix86_expand_vector_init_concat (hmode, second [j],
44553 &first [i], 2);
44555 n >>= 1;
44556 ix86_expand_vector_init_concat (mode, target, second, n);
44558 else
44559 ix86_expand_vector_init_concat (mode, target, first, n);
44560 break;
44562 default:
44563 gcc_unreachable ();
44567 /* A subroutine of ix86_expand_vector_init_general. Use vector
44568 interleave to handle the most general case: all values variable,
44569 and none identical. */
44571 static void
44572 ix86_expand_vector_init_interleave (machine_mode mode,
44573 rtx target, rtx *ops, int n)
44575 machine_mode first_imode, second_imode, third_imode, inner_mode;
44576 int i, j;
44577 rtx op0, op1;
44578 rtx (*gen_load_even) (rtx, rtx, rtx);
44579 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
44580 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
44582 switch (mode)
44584 case V8HImode:
44585 gen_load_even = gen_vec_setv8hi;
44586 gen_interleave_first_low = gen_vec_interleave_lowv4si;
44587 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44588 inner_mode = HImode;
44589 first_imode = V4SImode;
44590 second_imode = V2DImode;
44591 third_imode = VOIDmode;
44592 break;
44593 case V16QImode:
44594 gen_load_even = gen_vec_setv16qi;
44595 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
44596 gen_interleave_second_low = gen_vec_interleave_lowv4si;
44597 inner_mode = QImode;
44598 first_imode = V8HImode;
44599 second_imode = V4SImode;
44600 third_imode = V2DImode;
44601 break;
44602 default:
44603 gcc_unreachable ();
44606 for (i = 0; i < n; i++)
44608 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
44609 op0 = gen_reg_rtx (SImode);
44610 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
44612 /* Insert the SImode value as low element of V4SImode vector. */
44613 op1 = gen_reg_rtx (V4SImode);
44614 op0 = gen_rtx_VEC_MERGE (V4SImode,
44615 gen_rtx_VEC_DUPLICATE (V4SImode,
44616 op0),
44617 CONST0_RTX (V4SImode),
44618 const1_rtx);
44619 emit_insn (gen_rtx_SET (op1, op0));
44621 /* Cast the V4SImode vector back to a vector in orignal mode. */
44622 op0 = gen_reg_rtx (mode);
44623 emit_move_insn (op0, gen_lowpart (mode, op1));
44625 /* Load even elements into the second position. */
44626 emit_insn (gen_load_even (op0,
44627 force_reg (inner_mode,
44628 ops [i + i + 1]),
44629 const1_rtx));
44631 /* Cast vector to FIRST_IMODE vector. */
44632 ops[i] = gen_reg_rtx (first_imode);
44633 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
44636 /* Interleave low FIRST_IMODE vectors. */
44637 for (i = j = 0; i < n; i += 2, j++)
44639 op0 = gen_reg_rtx (first_imode);
44640 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
44642 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
44643 ops[j] = gen_reg_rtx (second_imode);
44644 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
44647 /* Interleave low SECOND_IMODE vectors. */
44648 switch (second_imode)
44650 case V4SImode:
44651 for (i = j = 0; i < n / 2; i += 2, j++)
44653 op0 = gen_reg_rtx (second_imode);
44654 emit_insn (gen_interleave_second_low (op0, ops[i],
44655 ops[i + 1]));
44657 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
44658 vector. */
44659 ops[j] = gen_reg_rtx (third_imode);
44660 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
44662 second_imode = V2DImode;
44663 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44664 /* FALLTHRU */
44666 case V2DImode:
44667 op0 = gen_reg_rtx (second_imode);
44668 emit_insn (gen_interleave_second_low (op0, ops[0],
44669 ops[1]));
44671 /* Cast the SECOND_IMODE vector back to a vector on original
44672 mode. */
44673 emit_insn (gen_rtx_SET (target, gen_lowpart (mode, op0)));
44674 break;
44676 default:
44677 gcc_unreachable ();
44681 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
44682 all values variable, and none identical. */
44684 static void
44685 ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
44686 rtx target, rtx vals)
44688 rtx ops[64], op0, op1, op2, op3, op4, op5;
44689 machine_mode half_mode = VOIDmode;
44690 machine_mode quarter_mode = VOIDmode;
44691 int n, i;
44693 switch (mode)
44695 case V2SFmode:
44696 case V2SImode:
44697 if (!mmx_ok && !TARGET_SSE)
44698 break;
44699 /* FALLTHRU */
44701 case V16SImode:
44702 case V16SFmode:
44703 case V8DFmode:
44704 case V8DImode:
44705 case V8SFmode:
44706 case V8SImode:
44707 case V4DFmode:
44708 case V4DImode:
44709 case V4SFmode:
44710 case V4SImode:
44711 case V2DFmode:
44712 case V2DImode:
44713 n = GET_MODE_NUNITS (mode);
44714 for (i = 0; i < n; i++)
44715 ops[i] = XVECEXP (vals, 0, i);
44716 ix86_expand_vector_init_concat (mode, target, ops, n);
44717 return;
44719 case V32QImode:
44720 half_mode = V16QImode;
44721 goto half;
44723 case V16HImode:
44724 half_mode = V8HImode;
44725 goto half;
44727 half:
44728 n = GET_MODE_NUNITS (mode);
44729 for (i = 0; i < n; i++)
44730 ops[i] = XVECEXP (vals, 0, i);
44731 op0 = gen_reg_rtx (half_mode);
44732 op1 = gen_reg_rtx (half_mode);
44733 ix86_expand_vector_init_interleave (half_mode, op0, ops,
44734 n >> 2);
44735 ix86_expand_vector_init_interleave (half_mode, op1,
44736 &ops [n >> 1], n >> 2);
44737 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op0, op1)));
44738 return;
44740 case V64QImode:
44741 quarter_mode = V16QImode;
44742 half_mode = V32QImode;
44743 goto quarter;
44745 case V32HImode:
44746 quarter_mode = V8HImode;
44747 half_mode = V16HImode;
44748 goto quarter;
44750 quarter:
44751 n = GET_MODE_NUNITS (mode);
44752 for (i = 0; i < n; i++)
44753 ops[i] = XVECEXP (vals, 0, i);
44754 op0 = gen_reg_rtx (quarter_mode);
44755 op1 = gen_reg_rtx (quarter_mode);
44756 op2 = gen_reg_rtx (quarter_mode);
44757 op3 = gen_reg_rtx (quarter_mode);
44758 op4 = gen_reg_rtx (half_mode);
44759 op5 = gen_reg_rtx (half_mode);
44760 ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
44761 n >> 3);
44762 ix86_expand_vector_init_interleave (quarter_mode, op1,
44763 &ops [n >> 2], n >> 3);
44764 ix86_expand_vector_init_interleave (quarter_mode, op2,
44765 &ops [n >> 1], n >> 3);
44766 ix86_expand_vector_init_interleave (quarter_mode, op3,
44767 &ops [(n >> 1) | (n >> 2)], n >> 3);
44768 emit_insn (gen_rtx_SET (op4, gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
44769 emit_insn (gen_rtx_SET (op5, gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
44770 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op4, op5)));
44771 return;
44773 case V16QImode:
44774 if (!TARGET_SSE4_1)
44775 break;
44776 /* FALLTHRU */
44778 case V8HImode:
44779 if (!TARGET_SSE2)
44780 break;
44782 /* Don't use ix86_expand_vector_init_interleave if we can't
44783 move from GPR to SSE register directly. */
44784 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
44785 break;
44787 n = GET_MODE_NUNITS (mode);
44788 for (i = 0; i < n; i++)
44789 ops[i] = XVECEXP (vals, 0, i);
44790 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
44791 return;
44793 case V4HImode:
44794 case V8QImode:
44795 break;
44797 default:
44798 gcc_unreachable ();
44802 int i, j, n_elts, n_words, n_elt_per_word;
44803 machine_mode inner_mode;
44804 rtx words[4], shift;
44806 inner_mode = GET_MODE_INNER (mode);
44807 n_elts = GET_MODE_NUNITS (mode);
44808 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
44809 n_elt_per_word = n_elts / n_words;
44810 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
44812 for (i = 0; i < n_words; ++i)
44814 rtx word = NULL_RTX;
44816 for (j = 0; j < n_elt_per_word; ++j)
44818 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
44819 elt = convert_modes (word_mode, inner_mode, elt, true);
44821 if (j == 0)
44822 word = elt;
44823 else
44825 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
44826 word, 1, OPTAB_LIB_WIDEN);
44827 word = expand_simple_binop (word_mode, IOR, word, elt,
44828 word, 1, OPTAB_LIB_WIDEN);
44832 words[i] = word;
44835 if (n_words == 1)
44836 emit_move_insn (target, gen_lowpart (mode, words[0]));
44837 else if (n_words == 2)
44839 rtx tmp = gen_reg_rtx (mode);
44840 emit_clobber (tmp);
44841 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
44842 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
44843 emit_move_insn (target, tmp);
44845 else if (n_words == 4)
44847 rtx tmp = gen_reg_rtx (V4SImode);
44848 gcc_assert (word_mode == SImode);
44849 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
44850 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
44851 emit_move_insn (target, gen_lowpart (mode, tmp));
44853 else
44854 gcc_unreachable ();
44858 /* Initialize vector TARGET via VALS. Suppress the use of MMX
44859 instructions unless MMX_OK is true. */
44861 void
44862 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
44864 machine_mode mode = GET_MODE (target);
44865 machine_mode inner_mode = GET_MODE_INNER (mode);
44866 int n_elts = GET_MODE_NUNITS (mode);
44867 int n_var = 0, one_var = -1;
44868 bool all_same = true, all_const_zero = true;
44869 int i;
44870 rtx x;
44872 for (i = 0; i < n_elts; ++i)
44874 x = XVECEXP (vals, 0, i);
44875 if (!(CONST_SCALAR_INT_P (x)
44876 || CONST_DOUBLE_P (x)
44877 || CONST_FIXED_P (x)))
44878 n_var++, one_var = i;
44879 else if (x != CONST0_RTX (inner_mode))
44880 all_const_zero = false;
44881 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
44882 all_same = false;
44885 /* Constants are best loaded from the constant pool. */
44886 if (n_var == 0)
44888 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
44889 return;
44892 /* If all values are identical, broadcast the value. */
44893 if (all_same
44894 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
44895 XVECEXP (vals, 0, 0)))
44896 return;
44898 /* Values where only one field is non-constant are best loaded from
44899 the pool and overwritten via move later. */
44900 if (n_var == 1)
44902 if (all_const_zero
44903 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
44904 XVECEXP (vals, 0, one_var),
44905 one_var))
44906 return;
44908 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
44909 return;
44912 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
44915 void
44916 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
44918 machine_mode mode = GET_MODE (target);
44919 machine_mode inner_mode = GET_MODE_INNER (mode);
44920 machine_mode half_mode;
44921 bool use_vec_merge = false;
44922 rtx tmp;
44923 static rtx (*gen_extract[6][2]) (rtx, rtx)
44925 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
44926 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
44927 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
44928 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
44929 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
44930 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
44932 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
44934 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
44935 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
44936 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
44937 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
44938 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
44939 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
44941 int i, j, n;
44942 machine_mode mmode = VOIDmode;
44943 rtx (*gen_blendm) (rtx, rtx, rtx, rtx);
44945 switch (mode)
44947 case V2SFmode:
44948 case V2SImode:
44949 if (mmx_ok)
44951 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44952 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
44953 if (elt == 0)
44954 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44955 else
44956 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44957 emit_insn (gen_rtx_SET (target, tmp));
44958 return;
44960 break;
44962 case V2DImode:
44963 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
44964 if (use_vec_merge)
44965 break;
44967 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44968 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
44969 if (elt == 0)
44970 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44971 else
44972 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44973 emit_insn (gen_rtx_SET (target, tmp));
44974 return;
44976 case V2DFmode:
44978 rtx op0, op1;
44980 /* For the two element vectors, we implement a VEC_CONCAT with
44981 the extraction of the other element. */
44983 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
44984 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
44986 if (elt == 0)
44987 op0 = val, op1 = tmp;
44988 else
44989 op0 = tmp, op1 = val;
44991 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
44992 emit_insn (gen_rtx_SET (target, tmp));
44994 return;
44996 case V4SFmode:
44997 use_vec_merge = TARGET_SSE4_1;
44998 if (use_vec_merge)
44999 break;
45001 switch (elt)
45003 case 0:
45004 use_vec_merge = true;
45005 break;
45007 case 1:
45008 /* tmp = target = A B C D */
45009 tmp = copy_to_reg (target);
45010 /* target = A A B B */
45011 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
45012 /* target = X A B B */
45013 ix86_expand_vector_set (false, target, val, 0);
45014 /* target = A X C D */
45015 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
45016 const1_rtx, const0_rtx,
45017 GEN_INT (2+4), GEN_INT (3+4)));
45018 return;
45020 case 2:
45021 /* tmp = target = A B C D */
45022 tmp = copy_to_reg (target);
45023 /* tmp = X B C D */
45024 ix86_expand_vector_set (false, tmp, val, 0);
45025 /* target = A B X D */
45026 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
45027 const0_rtx, const1_rtx,
45028 GEN_INT (0+4), GEN_INT (3+4)));
45029 return;
45031 case 3:
45032 /* tmp = target = A B C D */
45033 tmp = copy_to_reg (target);
45034 /* tmp = X B C D */
45035 ix86_expand_vector_set (false, tmp, val, 0);
45036 /* target = A B X D */
45037 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
45038 const0_rtx, const1_rtx,
45039 GEN_INT (2+4), GEN_INT (0+4)));
45040 return;
45042 default:
45043 gcc_unreachable ();
45045 break;
45047 case V4SImode:
45048 use_vec_merge = TARGET_SSE4_1;
45049 if (use_vec_merge)
45050 break;
45052 /* Element 0 handled by vec_merge below. */
45053 if (elt == 0)
45055 use_vec_merge = true;
45056 break;
45059 if (TARGET_SSE2)
45061 /* With SSE2, use integer shuffles to swap element 0 and ELT,
45062 store into element 0, then shuffle them back. */
45064 rtx order[4];
45066 order[0] = GEN_INT (elt);
45067 order[1] = const1_rtx;
45068 order[2] = const2_rtx;
45069 order[3] = GEN_INT (3);
45070 order[elt] = const0_rtx;
45072 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
45073 order[1], order[2], order[3]));
45075 ix86_expand_vector_set (false, target, val, 0);
45077 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
45078 order[1], order[2], order[3]));
45080 else
45082 /* For SSE1, we have to reuse the V4SF code. */
45083 rtx t = gen_reg_rtx (V4SFmode);
45084 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
45085 emit_move_insn (target, gen_lowpart (mode, t));
45087 return;
45089 case V8HImode:
45090 use_vec_merge = TARGET_SSE2;
45091 break;
45092 case V4HImode:
45093 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
45094 break;
45096 case V16QImode:
45097 use_vec_merge = TARGET_SSE4_1;
45098 break;
45100 case V8QImode:
45101 break;
45103 case V32QImode:
45104 half_mode = V16QImode;
45105 j = 0;
45106 n = 16;
45107 goto half;
45109 case V16HImode:
45110 half_mode = V8HImode;
45111 j = 1;
45112 n = 8;
45113 goto half;
45115 case V8SImode:
45116 half_mode = V4SImode;
45117 j = 2;
45118 n = 4;
45119 goto half;
45121 case V4DImode:
45122 half_mode = V2DImode;
45123 j = 3;
45124 n = 2;
45125 goto half;
45127 case V8SFmode:
45128 half_mode = V4SFmode;
45129 j = 4;
45130 n = 4;
45131 goto half;
45133 case V4DFmode:
45134 half_mode = V2DFmode;
45135 j = 5;
45136 n = 2;
45137 goto half;
45139 half:
45140 /* Compute offset. */
45141 i = elt / n;
45142 elt %= n;
45144 gcc_assert (i <= 1);
45146 /* Extract the half. */
45147 tmp = gen_reg_rtx (half_mode);
45148 emit_insn (gen_extract[j][i] (tmp, target));
45150 /* Put val in tmp at elt. */
45151 ix86_expand_vector_set (false, tmp, val, elt);
45153 /* Put it back. */
45154 emit_insn (gen_insert[j][i] (target, target, tmp));
45155 return;
45157 case V8DFmode:
45158 if (TARGET_AVX512F)
45160 mmode = QImode;
45161 gen_blendm = gen_avx512f_blendmv8df;
45163 break;
45165 case V8DImode:
45166 if (TARGET_AVX512F)
45168 mmode = QImode;
45169 gen_blendm = gen_avx512f_blendmv8di;
45171 break;
45173 case V16SFmode:
45174 if (TARGET_AVX512F)
45176 mmode = HImode;
45177 gen_blendm = gen_avx512f_blendmv16sf;
45179 break;
45181 case V16SImode:
45182 if (TARGET_AVX512F)
45184 mmode = HImode;
45185 gen_blendm = gen_avx512f_blendmv16si;
45187 break;
45189 case V32HImode:
45190 if (TARGET_AVX512F && TARGET_AVX512BW)
45192 mmode = SImode;
45193 gen_blendm = gen_avx512bw_blendmv32hi;
45195 break;
45197 case V64QImode:
45198 if (TARGET_AVX512F && TARGET_AVX512BW)
45200 mmode = DImode;
45201 gen_blendm = gen_avx512bw_blendmv64qi;
45203 break;
45205 default:
45206 break;
45209 if (mmode != VOIDmode)
45211 tmp = gen_reg_rtx (mode);
45212 emit_insn (gen_rtx_SET (tmp, gen_rtx_VEC_DUPLICATE (mode, val)));
45213 emit_insn (gen_blendm (target, tmp, target,
45214 force_reg (mmode,
45215 gen_int_mode (1 << elt, mmode))));
45217 else if (use_vec_merge)
45219 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
45220 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
45221 emit_insn (gen_rtx_SET (target, tmp));
45223 else
45225 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
45227 emit_move_insn (mem, target);
45229 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
45230 emit_move_insn (tmp, val);
45232 emit_move_insn (target, mem);
45236 void
45237 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
45239 machine_mode mode = GET_MODE (vec);
45240 machine_mode inner_mode = GET_MODE_INNER (mode);
45241 bool use_vec_extr = false;
45242 rtx tmp;
45244 switch (mode)
45246 case V2SImode:
45247 case V2SFmode:
45248 if (!mmx_ok)
45249 break;
45250 /* FALLTHRU */
45252 case V2DFmode:
45253 case V2DImode:
45254 use_vec_extr = true;
45255 break;
45257 case V4SFmode:
45258 use_vec_extr = TARGET_SSE4_1;
45259 if (use_vec_extr)
45260 break;
45262 switch (elt)
45264 case 0:
45265 tmp = vec;
45266 break;
45268 case 1:
45269 case 3:
45270 tmp = gen_reg_rtx (mode);
45271 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
45272 GEN_INT (elt), GEN_INT (elt),
45273 GEN_INT (elt+4), GEN_INT (elt+4)));
45274 break;
45276 case 2:
45277 tmp = gen_reg_rtx (mode);
45278 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
45279 break;
45281 default:
45282 gcc_unreachable ();
45284 vec = tmp;
45285 use_vec_extr = true;
45286 elt = 0;
45287 break;
45289 case V4SImode:
45290 use_vec_extr = TARGET_SSE4_1;
45291 if (use_vec_extr)
45292 break;
45294 if (TARGET_SSE2)
45296 switch (elt)
45298 case 0:
45299 tmp = vec;
45300 break;
45302 case 1:
45303 case 3:
45304 tmp = gen_reg_rtx (mode);
45305 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
45306 GEN_INT (elt), GEN_INT (elt),
45307 GEN_INT (elt), GEN_INT (elt)));
45308 break;
45310 case 2:
45311 tmp = gen_reg_rtx (mode);
45312 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
45313 break;
45315 default:
45316 gcc_unreachable ();
45318 vec = tmp;
45319 use_vec_extr = true;
45320 elt = 0;
45322 else
45324 /* For SSE1, we have to reuse the V4SF code. */
45325 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
45326 gen_lowpart (V4SFmode, vec), elt);
45327 return;
45329 break;
45331 case V8HImode:
45332 use_vec_extr = TARGET_SSE2;
45333 break;
45334 case V4HImode:
45335 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
45336 break;
45338 case V16QImode:
45339 use_vec_extr = TARGET_SSE4_1;
45340 break;
45342 case V8SFmode:
45343 if (TARGET_AVX)
45345 tmp = gen_reg_rtx (V4SFmode);
45346 if (elt < 4)
45347 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
45348 else
45349 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
45350 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45351 return;
45353 break;
45355 case V4DFmode:
45356 if (TARGET_AVX)
45358 tmp = gen_reg_rtx (V2DFmode);
45359 if (elt < 2)
45360 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
45361 else
45362 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
45363 ix86_expand_vector_extract (false, target, tmp, elt & 1);
45364 return;
45366 break;
45368 case V32QImode:
45369 if (TARGET_AVX)
45371 tmp = gen_reg_rtx (V16QImode);
45372 if (elt < 16)
45373 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
45374 else
45375 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
45376 ix86_expand_vector_extract (false, target, tmp, elt & 15);
45377 return;
45379 break;
45381 case V16HImode:
45382 if (TARGET_AVX)
45384 tmp = gen_reg_rtx (V8HImode);
45385 if (elt < 8)
45386 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
45387 else
45388 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
45389 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45390 return;
45392 break;
45394 case V8SImode:
45395 if (TARGET_AVX)
45397 tmp = gen_reg_rtx (V4SImode);
45398 if (elt < 4)
45399 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
45400 else
45401 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
45402 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45403 return;
45405 break;
45407 case V4DImode:
45408 if (TARGET_AVX)
45410 tmp = gen_reg_rtx (V2DImode);
45411 if (elt < 2)
45412 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
45413 else
45414 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
45415 ix86_expand_vector_extract (false, target, tmp, elt & 1);
45416 return;
45418 break;
45420 case V32HImode:
45421 if (TARGET_AVX512BW)
45423 tmp = gen_reg_rtx (V16HImode);
45424 if (elt < 16)
45425 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
45426 else
45427 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
45428 ix86_expand_vector_extract (false, target, tmp, elt & 15);
45429 return;
45431 break;
45433 case V64QImode:
45434 if (TARGET_AVX512BW)
45436 tmp = gen_reg_rtx (V32QImode);
45437 if (elt < 32)
45438 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
45439 else
45440 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
45441 ix86_expand_vector_extract (false, target, tmp, elt & 31);
45442 return;
45444 break;
45446 case V16SFmode:
45447 tmp = gen_reg_rtx (V8SFmode);
45448 if (elt < 8)
45449 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
45450 else
45451 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
45452 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45453 return;
45455 case V8DFmode:
45456 tmp = gen_reg_rtx (V4DFmode);
45457 if (elt < 4)
45458 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
45459 else
45460 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
45461 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45462 return;
45464 case V16SImode:
45465 tmp = gen_reg_rtx (V8SImode);
45466 if (elt < 8)
45467 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
45468 else
45469 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
45470 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45471 return;
45473 case V8DImode:
45474 tmp = gen_reg_rtx (V4DImode);
45475 if (elt < 4)
45476 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
45477 else
45478 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
45479 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45480 return;
45482 case V8QImode:
45483 /* ??? Could extract the appropriate HImode element and shift. */
45484 default:
45485 break;
45488 if (use_vec_extr)
45490 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
45491 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
45493 /* Let the rtl optimizers know about the zero extension performed. */
45494 if (inner_mode == QImode || inner_mode == HImode)
45496 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
45497 target = gen_lowpart (SImode, target);
45500 emit_insn (gen_rtx_SET (target, tmp));
45502 else
45504 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
45506 emit_move_insn (mem, vec);
45508 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
45509 emit_move_insn (target, tmp);
45513 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
45514 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
45515 The upper bits of DEST are undefined, though they shouldn't cause
45516 exceptions (some bits from src or all zeros are ok). */
45518 static void
45519 emit_reduc_half (rtx dest, rtx src, int i)
45521 rtx tem, d = dest;
45522 switch (GET_MODE (src))
45524 case V4SFmode:
45525 if (i == 128)
45526 tem = gen_sse_movhlps (dest, src, src);
45527 else
45528 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
45529 GEN_INT (1 + 4), GEN_INT (1 + 4));
45530 break;
45531 case V2DFmode:
45532 tem = gen_vec_interleave_highv2df (dest, src, src);
45533 break;
45534 case V16QImode:
45535 case V8HImode:
45536 case V4SImode:
45537 case V2DImode:
45538 d = gen_reg_rtx (V1TImode);
45539 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
45540 GEN_INT (i / 2));
45541 break;
45542 case V8SFmode:
45543 if (i == 256)
45544 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
45545 else
45546 tem = gen_avx_shufps256 (dest, src, src,
45547 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
45548 break;
45549 case V4DFmode:
45550 if (i == 256)
45551 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
45552 else
45553 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
45554 break;
45555 case V32QImode:
45556 case V16HImode:
45557 case V8SImode:
45558 case V4DImode:
45559 if (i == 256)
45561 if (GET_MODE (dest) != V4DImode)
45562 d = gen_reg_rtx (V4DImode);
45563 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
45564 gen_lowpart (V4DImode, src),
45565 const1_rtx);
45567 else
45569 d = gen_reg_rtx (V2TImode);
45570 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
45571 GEN_INT (i / 2));
45573 break;
45574 case V64QImode:
45575 case V32HImode:
45576 case V16SImode:
45577 case V16SFmode:
45578 case V8DImode:
45579 case V8DFmode:
45580 if (i > 128)
45581 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
45582 gen_lowpart (V16SImode, src),
45583 gen_lowpart (V16SImode, src),
45584 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
45585 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
45586 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
45587 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
45588 GEN_INT (0xC), GEN_INT (0xD),
45589 GEN_INT (0xE), GEN_INT (0xF),
45590 GEN_INT (0x10), GEN_INT (0x11),
45591 GEN_INT (0x12), GEN_INT (0x13),
45592 GEN_INT (0x14), GEN_INT (0x15),
45593 GEN_INT (0x16), GEN_INT (0x17));
45594 else
45595 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
45596 gen_lowpart (V16SImode, src),
45597 GEN_INT (i == 128 ? 0x2 : 0x1),
45598 GEN_INT (0x3),
45599 GEN_INT (0x3),
45600 GEN_INT (0x3),
45601 GEN_INT (i == 128 ? 0x6 : 0x5),
45602 GEN_INT (0x7),
45603 GEN_INT (0x7),
45604 GEN_INT (0x7),
45605 GEN_INT (i == 128 ? 0xA : 0x9),
45606 GEN_INT (0xB),
45607 GEN_INT (0xB),
45608 GEN_INT (0xB),
45609 GEN_INT (i == 128 ? 0xE : 0xD),
45610 GEN_INT (0xF),
45611 GEN_INT (0xF),
45612 GEN_INT (0xF));
45613 break;
45614 default:
45615 gcc_unreachable ();
45617 emit_insn (tem);
45618 if (d != dest)
45619 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
45622 /* Expand a vector reduction. FN is the binary pattern to reduce;
45623 DEST is the destination; IN is the input vector. */
45625 void
45626 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
45628 rtx half, dst, vec = in;
45629 machine_mode mode = GET_MODE (in);
45630 int i;
45632 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
45633 if (TARGET_SSE4_1
45634 && mode == V8HImode
45635 && fn == gen_uminv8hi3)
45637 emit_insn (gen_sse4_1_phminposuw (dest, in));
45638 return;
45641 for (i = GET_MODE_BITSIZE (mode);
45642 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
45643 i >>= 1)
45645 half = gen_reg_rtx (mode);
45646 emit_reduc_half (half, vec, i);
45647 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
45648 dst = dest;
45649 else
45650 dst = gen_reg_rtx (mode);
45651 emit_insn (fn (dst, half, vec));
45652 vec = dst;
45656 /* Target hook for scalar_mode_supported_p. */
45657 static bool
45658 ix86_scalar_mode_supported_p (machine_mode mode)
45660 if (DECIMAL_FLOAT_MODE_P (mode))
45661 return default_decimal_float_supported_p ();
45662 else if (mode == TFmode)
45663 return true;
45664 else
45665 return default_scalar_mode_supported_p (mode);
45668 /* Implements target hook vector_mode_supported_p. */
45669 static bool
45670 ix86_vector_mode_supported_p (machine_mode mode)
45672 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
45673 return true;
45674 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
45675 return true;
45676 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
45677 return true;
45678 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
45679 return true;
45680 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
45681 return true;
45682 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
45683 return true;
45684 return false;
45687 /* Implement target hook libgcc_floating_mode_supported_p. */
45688 static bool
45689 ix86_libgcc_floating_mode_supported_p (machine_mode mode)
45691 switch (mode)
45693 case SFmode:
45694 case DFmode:
45695 case XFmode:
45696 return true;
45698 case TFmode:
45699 #ifdef IX86_NO_LIBGCC_TFMODE
45700 return false;
45701 #elif defined IX86_MAYBE_NO_LIBGCC_TFMODE
45702 return TARGET_LONG_DOUBLE_128;
45703 #else
45704 return true;
45705 #endif
45707 default:
45708 return false;
45712 /* Target hook for c_mode_for_suffix. */
45713 static machine_mode
45714 ix86_c_mode_for_suffix (char suffix)
45716 if (suffix == 'q')
45717 return TFmode;
45718 if (suffix == 'w')
45719 return XFmode;
45721 return VOIDmode;
45724 /* Worker function for TARGET_MD_ASM_ADJUST.
45726 We implement asm flag outputs, and maintain source compatibility
45727 with the old cc0-based compiler. */
45729 static rtx_insn *
45730 ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &/*inputs*/,
45731 vec<const char *> &constraints,
45732 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
45734 clobbers.safe_push (gen_rtx_REG (CCFPmode, FPSR_REG));
45735 SET_HARD_REG_BIT (clobbered_regs, FPSR_REG);
45737 bool saw_asm_flag = false;
45739 start_sequence ();
45740 for (unsigned i = 0, n = outputs.length (); i < n; ++i)
45742 const char *con = constraints[i];
45743 if (strncmp (con, "=@cc", 4) != 0)
45744 continue;
45745 con += 4;
45746 if (strchr (con, ',') != NULL)
45748 error ("alternatives not allowed in asm flag output");
45749 continue;
45752 bool invert = false;
45753 if (con[0] == 'n')
45754 invert = true, con++;
45756 machine_mode mode = CCmode;
45757 rtx_code code = UNKNOWN;
45759 switch (con[0])
45761 case 'a':
45762 if (con[1] == 0)
45763 mode = CCAmode, code = EQ;
45764 else if (con[1] == 'e' && con[2] == 0)
45765 mode = CCCmode, code = EQ;
45766 break;
45767 case 'b':
45768 if (con[1] == 0)
45769 mode = CCCmode, code = EQ;
45770 else if (con[1] == 'e' && con[2] == 0)
45771 mode = CCAmode, code = NE;
45772 break;
45773 case 'c':
45774 if (con[1] == 0)
45775 mode = CCCmode, code = EQ;
45776 break;
45777 case 'e':
45778 if (con[1] == 0)
45779 mode = CCZmode, code = EQ;
45780 break;
45781 case 'g':
45782 if (con[1] == 0)
45783 mode = CCGCmode, code = GT;
45784 else if (con[1] == 'e' && con[2] == 0)
45785 mode = CCGCmode, code = GE;
45786 break;
45787 case 'l':
45788 if (con[1] == 0)
45789 mode = CCGCmode, code = LT;
45790 else if (con[1] == 'e' && con[2] == 0)
45791 mode = CCGCmode, code = LE;
45792 break;
45793 case 'o':
45794 if (con[1] == 0)
45795 mode = CCOmode, code = EQ;
45796 break;
45797 case 'p':
45798 if (con[1] == 0)
45799 mode = CCPmode, code = EQ;
45800 break;
45801 case 's':
45802 if (con[1] == 0)
45803 mode = CCSmode, code = EQ;
45804 break;
45805 case 'z':
45806 if (con[1] == 0)
45807 mode = CCZmode, code = EQ;
45808 break;
45810 if (code == UNKNOWN)
45812 error ("unknown asm flag output %qs", constraints[i]);
45813 continue;
45815 if (invert)
45816 code = reverse_condition (code);
45818 rtx dest = outputs[i];
45819 if (!saw_asm_flag)
45821 /* This is the first asm flag output. Here we put the flags
45822 register in as the real output and adjust the condition to
45823 allow it. */
45824 constraints[i] = "=Bf";
45825 outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG);
45826 saw_asm_flag = true;
45828 else
45830 /* We don't need the flags register as output twice. */
45831 constraints[i] = "=X";
45832 outputs[i] = gen_rtx_SCRATCH (SImode);
45835 rtx x = gen_rtx_REG (mode, FLAGS_REG);
45836 x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx);
45838 machine_mode dest_mode = GET_MODE (dest);
45839 if (!SCALAR_INT_MODE_P (dest_mode))
45841 error ("invalid type for asm flag output");
45842 continue;
45844 if (dest_mode != QImode)
45846 rtx destqi = gen_reg_rtx (QImode);
45847 emit_insn (gen_rtx_SET (destqi, x));
45848 x = gen_rtx_ZERO_EXTEND (dest_mode, destqi);
45850 emit_insn (gen_rtx_SET (dest, x));
45852 rtx_insn *seq = get_insns ();
45853 end_sequence ();
45855 if (saw_asm_flag)
45856 return seq;
45857 else
45859 /* If we had no asm flag outputs, clobber the flags. */
45860 clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
45861 SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
45862 return NULL;
45866 /* Implements target vector targetm.asm.encode_section_info. */
45868 static void ATTRIBUTE_UNUSED
45869 ix86_encode_section_info (tree decl, rtx rtl, int first)
45871 default_encode_section_info (decl, rtl, first);
45873 if (ix86_in_large_data_p (decl))
45874 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
45877 /* Worker function for REVERSE_CONDITION. */
45879 enum rtx_code
45880 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
45882 return (mode != CCFPmode && mode != CCFPUmode
45883 ? reverse_condition (code)
45884 : reverse_condition_maybe_unordered (code));
45887 /* Output code to perform an x87 FP register move, from OPERANDS[1]
45888 to OPERANDS[0]. */
45890 const char *
45891 output_387_reg_move (rtx insn, rtx *operands)
45893 if (REG_P (operands[0]))
45895 if (REG_P (operands[1])
45896 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45898 if (REGNO (operands[0]) == FIRST_STACK_REG)
45899 return output_387_ffreep (operands, 0);
45900 return "fstp\t%y0";
45902 if (STACK_TOP_P (operands[0]))
45903 return "fld%Z1\t%y1";
45904 return "fst\t%y0";
45906 else if (MEM_P (operands[0]))
45908 gcc_assert (REG_P (operands[1]));
45909 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45910 return "fstp%Z0\t%y0";
45911 else
45913 /* There is no non-popping store to memory for XFmode.
45914 So if we need one, follow the store with a load. */
45915 if (GET_MODE (operands[0]) == XFmode)
45916 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
45917 else
45918 return "fst%Z0\t%y0";
45921 else
45922 gcc_unreachable();
45925 /* Output code to perform a conditional jump to LABEL, if C2 flag in
45926 FP status register is set. */
45928 void
45929 ix86_emit_fp_unordered_jump (rtx label)
45931 rtx reg = gen_reg_rtx (HImode);
45932 rtx temp;
45934 emit_insn (gen_x86_fnstsw_1 (reg));
45936 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
45938 emit_insn (gen_x86_sahf_1 (reg));
45940 temp = gen_rtx_REG (CCmode, FLAGS_REG);
45941 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
45943 else
45945 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
45947 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
45948 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
45951 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
45952 gen_rtx_LABEL_REF (VOIDmode, label),
45953 pc_rtx);
45954 temp = gen_rtx_SET (pc_rtx, temp);
45956 emit_jump_insn (temp);
45957 predict_jump (REG_BR_PROB_BASE * 10 / 100);
45960 /* Output code to perform a log1p XFmode calculation. */
45962 void ix86_emit_i387_log1p (rtx op0, rtx op1)
45964 rtx_code_label *label1 = gen_label_rtx ();
45965 rtx_code_label *label2 = gen_label_rtx ();
45967 rtx tmp = gen_reg_rtx (XFmode);
45968 rtx tmp2 = gen_reg_rtx (XFmode);
45969 rtx test;
45971 emit_insn (gen_absxf2 (tmp, op1));
45972 test = gen_rtx_GE (VOIDmode, tmp,
45973 CONST_DOUBLE_FROM_REAL_VALUE (
45974 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
45975 XFmode));
45976 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
45978 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45979 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
45980 emit_jump (label2);
45982 emit_label (label1);
45983 emit_move_insn (tmp, CONST1_RTX (XFmode));
45984 emit_insn (gen_addxf3 (tmp, op1, tmp));
45985 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45986 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
45988 emit_label (label2);
45991 /* Emit code for round calculation. */
45992 void ix86_emit_i387_round (rtx op0, rtx op1)
45994 machine_mode inmode = GET_MODE (op1);
45995 machine_mode outmode = GET_MODE (op0);
45996 rtx e1, e2, res, tmp, tmp1, half;
45997 rtx scratch = gen_reg_rtx (HImode);
45998 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
45999 rtx_code_label *jump_label = gen_label_rtx ();
46000 rtx insn;
46001 rtx (*gen_abs) (rtx, rtx);
46002 rtx (*gen_neg) (rtx, rtx);
46004 switch (inmode)
46006 case SFmode:
46007 gen_abs = gen_abssf2;
46008 break;
46009 case DFmode:
46010 gen_abs = gen_absdf2;
46011 break;
46012 case XFmode:
46013 gen_abs = gen_absxf2;
46014 break;
46015 default:
46016 gcc_unreachable ();
46019 switch (outmode)
46021 case SFmode:
46022 gen_neg = gen_negsf2;
46023 break;
46024 case DFmode:
46025 gen_neg = gen_negdf2;
46026 break;
46027 case XFmode:
46028 gen_neg = gen_negxf2;
46029 break;
46030 case HImode:
46031 gen_neg = gen_neghi2;
46032 break;
46033 case SImode:
46034 gen_neg = gen_negsi2;
46035 break;
46036 case DImode:
46037 gen_neg = gen_negdi2;
46038 break;
46039 default:
46040 gcc_unreachable ();
46043 e1 = gen_reg_rtx (inmode);
46044 e2 = gen_reg_rtx (inmode);
46045 res = gen_reg_rtx (outmode);
46047 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
46049 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
46051 /* scratch = fxam(op1) */
46052 emit_insn (gen_rtx_SET (scratch,
46053 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
46054 UNSPEC_FXAM)));
46055 /* e1 = fabs(op1) */
46056 emit_insn (gen_abs (e1, op1));
46058 /* e2 = e1 + 0.5 */
46059 half = force_reg (inmode, half);
46060 emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (inmode, e1, half)));
46062 /* res = floor(e2) */
46063 if (inmode != XFmode)
46065 tmp1 = gen_reg_rtx (XFmode);
46067 emit_insn (gen_rtx_SET (tmp1, gen_rtx_FLOAT_EXTEND (XFmode, e2)));
46069 else
46070 tmp1 = e2;
46072 switch (outmode)
46074 case SFmode:
46075 case DFmode:
46077 rtx tmp0 = gen_reg_rtx (XFmode);
46079 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
46081 emit_insn (gen_rtx_SET (res,
46082 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
46083 UNSPEC_TRUNC_NOOP)));
46085 break;
46086 case XFmode:
46087 emit_insn (gen_frndintxf2_floor (res, tmp1));
46088 break;
46089 case HImode:
46090 emit_insn (gen_lfloorxfhi2 (res, tmp1));
46091 break;
46092 case SImode:
46093 emit_insn (gen_lfloorxfsi2 (res, tmp1));
46094 break;
46095 case DImode:
46096 emit_insn (gen_lfloorxfdi2 (res, tmp1));
46097 break;
46098 default:
46099 gcc_unreachable ();
46102 /* flags = signbit(a) */
46103 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
46105 /* if (flags) then res = -res */
46106 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
46107 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
46108 gen_rtx_LABEL_REF (VOIDmode, jump_label),
46109 pc_rtx);
46110 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
46111 predict_jump (REG_BR_PROB_BASE * 50 / 100);
46112 JUMP_LABEL (insn) = jump_label;
46114 emit_insn (gen_neg (res, res));
46116 emit_label (jump_label);
46117 LABEL_NUSES (jump_label) = 1;
46119 emit_move_insn (op0, res);
46122 /* Output code to perform a Newton-Rhapson approximation of a single precision
46123 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
46125 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
46127 rtx x0, x1, e0, e1;
46129 x0 = gen_reg_rtx (mode);
46130 e0 = gen_reg_rtx (mode);
46131 e1 = gen_reg_rtx (mode);
46132 x1 = gen_reg_rtx (mode);
46134 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
46136 b = force_reg (mode, b);
46138 /* x0 = rcp(b) estimate */
46139 if (mode == V16SFmode || mode == V8DFmode)
46140 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
46141 UNSPEC_RCP14)));
46142 else
46143 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
46144 UNSPEC_RCP)));
46146 /* e0 = x0 * b */
46147 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b)));
46149 /* e0 = x0 * e0 */
46150 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0)));
46152 /* e1 = x0 + x0 */
46153 emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0)));
46155 /* x1 = e1 - e0 */
46156 emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0)));
46158 /* res = a * x1 */
46159 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1)));
46162 /* Output code to perform a Newton-Rhapson approximation of a
46163 single precision floating point [reciprocal] square root. */
46165 void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode,
46166 bool recip)
46168 rtx x0, e0, e1, e2, e3, mthree, mhalf;
46169 REAL_VALUE_TYPE r;
46170 int unspec;
46172 x0 = gen_reg_rtx (mode);
46173 e0 = gen_reg_rtx (mode);
46174 e1 = gen_reg_rtx (mode);
46175 e2 = gen_reg_rtx (mode);
46176 e3 = gen_reg_rtx (mode);
46178 real_from_integer (&r, VOIDmode, -3, SIGNED);
46179 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
46181 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
46182 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
46183 unspec = UNSPEC_RSQRT;
46185 if (VECTOR_MODE_P (mode))
46187 mthree = ix86_build_const_vector (mode, true, mthree);
46188 mhalf = ix86_build_const_vector (mode, true, mhalf);
46189 /* There is no 512-bit rsqrt. There is however rsqrt14. */
46190 if (GET_MODE_SIZE (mode) == 64)
46191 unspec = UNSPEC_RSQRT14;
46194 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
46195 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
46197 a = force_reg (mode, a);
46199 /* x0 = rsqrt(a) estimate */
46200 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
46201 unspec)));
46203 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
46204 if (!recip)
46206 rtx zero, mask;
46208 zero = gen_reg_rtx (mode);
46209 mask = gen_reg_rtx (mode);
46211 zero = force_reg (mode, CONST0_RTX(mode));
46213 /* Handle masked compare. */
46214 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
46216 mask = gen_reg_rtx (HImode);
46217 /* Imm value 0x4 corresponds to not-equal comparison. */
46218 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
46219 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
46221 else
46223 emit_insn (gen_rtx_SET (mask, gen_rtx_NE (mode, zero, a)));
46225 emit_insn (gen_rtx_SET (x0, gen_rtx_AND (mode, x0, mask)));
46229 /* e0 = x0 * a */
46230 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a)));
46231 /* e1 = e0 * x0 */
46232 emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, e0, x0)));
46234 /* e2 = e1 - 3. */
46235 mthree = force_reg (mode, mthree);
46236 emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (mode, e1, mthree)));
46238 mhalf = force_reg (mode, mhalf);
46239 if (recip)
46240 /* e3 = -.5 * x0 */
46241 emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, x0, mhalf)));
46242 else
46243 /* e3 = -.5 * e0 */
46244 emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, e0, mhalf)));
46245 /* ret = e2 * e3 */
46246 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e2, e3)));
46249 #ifdef TARGET_SOLARIS
46250 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
46252 static void
46253 i386_solaris_elf_named_section (const char *name, unsigned int flags,
46254 tree decl)
46256 /* With Binutils 2.15, the "@unwind" marker must be specified on
46257 every occurrence of the ".eh_frame" section, not just the first
46258 one. */
46259 if (TARGET_64BIT
46260 && strcmp (name, ".eh_frame") == 0)
46262 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
46263 flags & SECTION_WRITE ? "aw" : "a");
46264 return;
46267 #ifndef USE_GAS
46268 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
46270 solaris_elf_asm_comdat_section (name, flags, decl);
46271 return;
46273 #endif
46275 default_elf_asm_named_section (name, flags, decl);
46277 #endif /* TARGET_SOLARIS */
46279 /* Return the mangling of TYPE if it is an extended fundamental type. */
46281 static const char *
46282 ix86_mangle_type (const_tree type)
46284 type = TYPE_MAIN_VARIANT (type);
46286 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
46287 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
46288 return NULL;
46290 switch (TYPE_MODE (type))
46292 case TFmode:
46293 /* __float128 is "g". */
46294 return "g";
46295 case XFmode:
46296 /* "long double" or __float80 is "e". */
46297 return "e";
46298 default:
46299 return NULL;
46303 /* For 32-bit code we can save PIC register setup by using
46304 __stack_chk_fail_local hidden function instead of calling
46305 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
46306 register, so it is better to call __stack_chk_fail directly. */
46308 static tree ATTRIBUTE_UNUSED
46309 ix86_stack_protect_fail (void)
46311 return TARGET_64BIT
46312 ? default_external_stack_protect_fail ()
46313 : default_hidden_stack_protect_fail ();
46316 /* Select a format to encode pointers in exception handling data. CODE
46317 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
46318 true if the symbol may be affected by dynamic relocations.
46320 ??? All x86 object file formats are capable of representing this.
46321 After all, the relocation needed is the same as for the call insn.
46322 Whether or not a particular assembler allows us to enter such, I
46323 guess we'll have to see. */
46325 asm_preferred_eh_data_format (int code, int global)
46327 if (flag_pic)
46329 int type = DW_EH_PE_sdata8;
46330 if (!TARGET_64BIT
46331 || ix86_cmodel == CM_SMALL_PIC
46332 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
46333 type = DW_EH_PE_sdata4;
46334 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
46336 if (ix86_cmodel == CM_SMALL
46337 || (ix86_cmodel == CM_MEDIUM && code))
46338 return DW_EH_PE_udata4;
46339 return DW_EH_PE_absptr;
46342 /* Expand copysign from SIGN to the positive value ABS_VALUE
46343 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
46344 the sign-bit. */
46345 static void
46346 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
46348 machine_mode mode = GET_MODE (sign);
46349 rtx sgn = gen_reg_rtx (mode);
46350 if (mask == NULL_RTX)
46352 machine_mode vmode;
46354 if (mode == SFmode)
46355 vmode = V4SFmode;
46356 else if (mode == DFmode)
46357 vmode = V2DFmode;
46358 else
46359 vmode = mode;
46361 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
46362 if (!VECTOR_MODE_P (mode))
46364 /* We need to generate a scalar mode mask in this case. */
46365 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
46366 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
46367 mask = gen_reg_rtx (mode);
46368 emit_insn (gen_rtx_SET (mask, tmp));
46371 else
46372 mask = gen_rtx_NOT (mode, mask);
46373 emit_insn (gen_rtx_SET (sgn, gen_rtx_AND (mode, mask, sign)));
46374 emit_insn (gen_rtx_SET (result, gen_rtx_IOR (mode, abs_value, sgn)));
46377 /* Expand fabs (OP0) and return a new rtx that holds the result. The
46378 mask for masking out the sign-bit is stored in *SMASK, if that is
46379 non-null. */
46380 static rtx
46381 ix86_expand_sse_fabs (rtx op0, rtx *smask)
46383 machine_mode vmode, mode = GET_MODE (op0);
46384 rtx xa, mask;
46386 xa = gen_reg_rtx (mode);
46387 if (mode == SFmode)
46388 vmode = V4SFmode;
46389 else if (mode == DFmode)
46390 vmode = V2DFmode;
46391 else
46392 vmode = mode;
46393 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
46394 if (!VECTOR_MODE_P (mode))
46396 /* We need to generate a scalar mode mask in this case. */
46397 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
46398 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
46399 mask = gen_reg_rtx (mode);
46400 emit_insn (gen_rtx_SET (mask, tmp));
46402 emit_insn (gen_rtx_SET (xa, gen_rtx_AND (mode, op0, mask)));
46404 if (smask)
46405 *smask = mask;
46407 return xa;
46410 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
46411 swapping the operands if SWAP_OPERANDS is true. The expanded
46412 code is a forward jump to a newly created label in case the
46413 comparison is true. The generated label rtx is returned. */
46414 static rtx_code_label *
46415 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
46416 bool swap_operands)
46418 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
46419 rtx_code_label *label;
46420 rtx tmp;
46422 if (swap_operands)
46423 std::swap (op0, op1);
46425 label = gen_label_rtx ();
46426 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
46427 emit_insn (gen_rtx_SET (tmp, gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
46428 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
46429 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
46430 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
46431 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
46432 JUMP_LABEL (tmp) = label;
46434 return label;
46437 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
46438 using comparison code CODE. Operands are swapped for the comparison if
46439 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
46440 static rtx
46441 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
46442 bool swap_operands)
46444 rtx (*insn)(rtx, rtx, rtx, rtx);
46445 machine_mode mode = GET_MODE (op0);
46446 rtx mask = gen_reg_rtx (mode);
46448 if (swap_operands)
46449 std::swap (op0, op1);
46451 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
46453 emit_insn (insn (mask, op0, op1,
46454 gen_rtx_fmt_ee (code, mode, op0, op1)));
46455 return mask;
46458 /* Generate and return a rtx of mode MODE for 2**n where n is the number
46459 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
46460 static rtx
46461 ix86_gen_TWO52 (machine_mode mode)
46463 REAL_VALUE_TYPE TWO52r;
46464 rtx TWO52;
46466 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
46467 TWO52 = const_double_from_real_value (TWO52r, mode);
46468 TWO52 = force_reg (mode, TWO52);
46470 return TWO52;
46473 /* Expand SSE sequence for computing lround from OP1 storing
46474 into OP0. */
46475 void
46476 ix86_expand_lround (rtx op0, rtx op1)
46478 /* C code for the stuff we're doing below:
46479 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
46480 return (long)tmp;
46482 machine_mode mode = GET_MODE (op1);
46483 const struct real_format *fmt;
46484 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46485 rtx adj;
46487 /* load nextafter (0.5, 0.0) */
46488 fmt = REAL_MODE_FORMAT (mode);
46489 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46490 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46492 /* adj = copysign (0.5, op1) */
46493 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
46494 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
46496 /* adj = op1 + adj */
46497 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
46499 /* op0 = (imode)adj */
46500 expand_fix (op0, adj, 0);
46503 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
46504 into OPERAND0. */
46505 void
46506 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
46508 /* C code for the stuff we're doing below (for do_floor):
46509 xi = (long)op1;
46510 xi -= (double)xi > op1 ? 1 : 0;
46511 return xi;
46513 machine_mode fmode = GET_MODE (op1);
46514 machine_mode imode = GET_MODE (op0);
46515 rtx ireg, freg, tmp;
46516 rtx_code_label *label;
46518 /* reg = (long)op1 */
46519 ireg = gen_reg_rtx (imode);
46520 expand_fix (ireg, op1, 0);
46522 /* freg = (double)reg */
46523 freg = gen_reg_rtx (fmode);
46524 expand_float (freg, ireg, 0);
46526 /* ireg = (freg > op1) ? ireg - 1 : ireg */
46527 label = ix86_expand_sse_compare_and_jump (UNLE,
46528 freg, op1, !do_floor);
46529 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
46530 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
46531 emit_move_insn (ireg, tmp);
46533 emit_label (label);
46534 LABEL_NUSES (label) = 1;
46536 emit_move_insn (op0, ireg);
46539 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
46540 result in OPERAND0. */
46541 void
46542 ix86_expand_rint (rtx operand0, rtx operand1)
46544 /* C code for the stuff we're doing below:
46545 xa = fabs (operand1);
46546 if (!isless (xa, 2**52))
46547 return operand1;
46548 xa = xa + 2**52 - 2**52;
46549 return copysign (xa, operand1);
46551 machine_mode mode = GET_MODE (operand0);
46552 rtx res, xa, TWO52, mask;
46553 rtx_code_label *label;
46555 res = gen_reg_rtx (mode);
46556 emit_move_insn (res, operand1);
46558 /* xa = abs (operand1) */
46559 xa = ix86_expand_sse_fabs (res, &mask);
46561 /* if (!isless (xa, TWO52)) goto label; */
46562 TWO52 = ix86_gen_TWO52 (mode);
46563 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46565 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46566 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46568 ix86_sse_copysign_to_positive (res, xa, res, mask);
46570 emit_label (label);
46571 LABEL_NUSES (label) = 1;
46573 emit_move_insn (operand0, res);
46576 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46577 into OPERAND0. */
46578 void
46579 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
46581 /* C code for the stuff we expand below.
46582 double xa = fabs (x), x2;
46583 if (!isless (xa, TWO52))
46584 return x;
46585 xa = xa + TWO52 - TWO52;
46586 x2 = copysign (xa, x);
46587 Compensate. Floor:
46588 if (x2 > x)
46589 x2 -= 1;
46590 Compensate. Ceil:
46591 if (x2 < x)
46592 x2 -= -1;
46593 return x2;
46595 machine_mode mode = GET_MODE (operand0);
46596 rtx xa, TWO52, tmp, one, res, mask;
46597 rtx_code_label *label;
46599 TWO52 = ix86_gen_TWO52 (mode);
46601 /* Temporary for holding the result, initialized to the input
46602 operand to ease control flow. */
46603 res = gen_reg_rtx (mode);
46604 emit_move_insn (res, operand1);
46606 /* xa = abs (operand1) */
46607 xa = ix86_expand_sse_fabs (res, &mask);
46609 /* if (!isless (xa, TWO52)) goto label; */
46610 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46612 /* xa = xa + TWO52 - TWO52; */
46613 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46614 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46616 /* xa = copysign (xa, operand1) */
46617 ix86_sse_copysign_to_positive (xa, xa, res, mask);
46619 /* generate 1.0 or -1.0 */
46620 one = force_reg (mode,
46621 const_double_from_real_value (do_floor
46622 ? dconst1 : dconstm1, mode));
46624 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46625 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46626 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
46627 /* We always need to subtract here to preserve signed zero. */
46628 tmp = expand_simple_binop (mode, MINUS,
46629 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46630 emit_move_insn (res, tmp);
46632 emit_label (label);
46633 LABEL_NUSES (label) = 1;
46635 emit_move_insn (operand0, res);
46638 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46639 into OPERAND0. */
46640 void
46641 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
46643 /* C code for the stuff we expand below.
46644 double xa = fabs (x), x2;
46645 if (!isless (xa, TWO52))
46646 return x;
46647 x2 = (double)(long)x;
46648 Compensate. Floor:
46649 if (x2 > x)
46650 x2 -= 1;
46651 Compensate. Ceil:
46652 if (x2 < x)
46653 x2 += 1;
46654 if (HONOR_SIGNED_ZEROS (mode))
46655 return copysign (x2, x);
46656 return x2;
46658 machine_mode mode = GET_MODE (operand0);
46659 rtx xa, xi, TWO52, tmp, one, res, mask;
46660 rtx_code_label *label;
46662 TWO52 = ix86_gen_TWO52 (mode);
46664 /* Temporary for holding the result, initialized to the input
46665 operand to ease control flow. */
46666 res = gen_reg_rtx (mode);
46667 emit_move_insn (res, operand1);
46669 /* xa = abs (operand1) */
46670 xa = ix86_expand_sse_fabs (res, &mask);
46672 /* if (!isless (xa, TWO52)) goto label; */
46673 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46675 /* xa = (double)(long)x */
46676 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46677 expand_fix (xi, res, 0);
46678 expand_float (xa, xi, 0);
46680 /* generate 1.0 */
46681 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46683 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46684 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46685 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
46686 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
46687 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46688 emit_move_insn (res, tmp);
46690 if (HONOR_SIGNED_ZEROS (mode))
46691 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46693 emit_label (label);
46694 LABEL_NUSES (label) = 1;
46696 emit_move_insn (operand0, res);
46699 /* Expand SSE sequence for computing round from OPERAND1 storing
46700 into OPERAND0. Sequence that works without relying on DImode truncation
46701 via cvttsd2siq that is only available on 64bit targets. */
46702 void
46703 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
46705 /* C code for the stuff we expand below.
46706 double xa = fabs (x), xa2, x2;
46707 if (!isless (xa, TWO52))
46708 return x;
46709 Using the absolute value and copying back sign makes
46710 -0.0 -> -0.0 correct.
46711 xa2 = xa + TWO52 - TWO52;
46712 Compensate.
46713 dxa = xa2 - xa;
46714 if (dxa <= -0.5)
46715 xa2 += 1;
46716 else if (dxa > 0.5)
46717 xa2 -= 1;
46718 x2 = copysign (xa2, x);
46719 return x2;
46721 machine_mode mode = GET_MODE (operand0);
46722 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
46723 rtx_code_label *label;
46725 TWO52 = ix86_gen_TWO52 (mode);
46727 /* Temporary for holding the result, initialized to the input
46728 operand to ease control flow. */
46729 res = gen_reg_rtx (mode);
46730 emit_move_insn (res, operand1);
46732 /* xa = abs (operand1) */
46733 xa = ix86_expand_sse_fabs (res, &mask);
46735 /* if (!isless (xa, TWO52)) goto label; */
46736 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46738 /* xa2 = xa + TWO52 - TWO52; */
46739 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46740 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
46742 /* dxa = xa2 - xa; */
46743 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
46745 /* generate 0.5, 1.0 and -0.5 */
46746 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
46747 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
46748 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
46749 0, OPTAB_DIRECT);
46751 /* Compensate. */
46752 tmp = gen_reg_rtx (mode);
46753 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
46754 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
46755 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
46756 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46757 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
46758 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
46759 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
46760 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46762 /* res = copysign (xa2, operand1) */
46763 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
46765 emit_label (label);
46766 LABEL_NUSES (label) = 1;
46768 emit_move_insn (operand0, res);
46771 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46772 into OPERAND0. */
46773 void
46774 ix86_expand_trunc (rtx operand0, rtx operand1)
46776 /* C code for SSE variant we expand below.
46777 double xa = fabs (x), x2;
46778 if (!isless (xa, TWO52))
46779 return x;
46780 x2 = (double)(long)x;
46781 if (HONOR_SIGNED_ZEROS (mode))
46782 return copysign (x2, x);
46783 return x2;
46785 machine_mode mode = GET_MODE (operand0);
46786 rtx xa, xi, TWO52, res, mask;
46787 rtx_code_label *label;
46789 TWO52 = ix86_gen_TWO52 (mode);
46791 /* Temporary for holding the result, initialized to the input
46792 operand to ease control flow. */
46793 res = gen_reg_rtx (mode);
46794 emit_move_insn (res, operand1);
46796 /* xa = abs (operand1) */
46797 xa = ix86_expand_sse_fabs (res, &mask);
46799 /* if (!isless (xa, TWO52)) goto label; */
46800 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46802 /* x = (double)(long)x */
46803 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46804 expand_fix (xi, res, 0);
46805 expand_float (res, xi, 0);
46807 if (HONOR_SIGNED_ZEROS (mode))
46808 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46810 emit_label (label);
46811 LABEL_NUSES (label) = 1;
46813 emit_move_insn (operand0, res);
46816 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46817 into OPERAND0. */
46818 void
46819 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
46821 machine_mode mode = GET_MODE (operand0);
46822 rtx xa, mask, TWO52, one, res, smask, tmp;
46823 rtx_code_label *label;
46825 /* C code for SSE variant we expand below.
46826 double xa = fabs (x), x2;
46827 if (!isless (xa, TWO52))
46828 return x;
46829 xa2 = xa + TWO52 - TWO52;
46830 Compensate:
46831 if (xa2 > xa)
46832 xa2 -= 1.0;
46833 x2 = copysign (xa2, x);
46834 return x2;
46837 TWO52 = ix86_gen_TWO52 (mode);
46839 /* Temporary for holding the result, initialized to the input
46840 operand to ease control flow. */
46841 res = gen_reg_rtx (mode);
46842 emit_move_insn (res, operand1);
46844 /* xa = abs (operand1) */
46845 xa = ix86_expand_sse_fabs (res, &smask);
46847 /* if (!isless (xa, TWO52)) goto label; */
46848 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46850 /* res = xa + TWO52 - TWO52; */
46851 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46852 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
46853 emit_move_insn (res, tmp);
46855 /* generate 1.0 */
46856 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46858 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
46859 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
46860 emit_insn (gen_rtx_SET (mask, gen_rtx_AND (mode, mask, one)));
46861 tmp = expand_simple_binop (mode, MINUS,
46862 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
46863 emit_move_insn (res, tmp);
46865 /* res = copysign (res, operand1) */
46866 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
46868 emit_label (label);
46869 LABEL_NUSES (label) = 1;
46871 emit_move_insn (operand0, res);
46874 /* Expand SSE sequence for computing round from OPERAND1 storing
46875 into OPERAND0. */
46876 void
46877 ix86_expand_round (rtx operand0, rtx operand1)
46879 /* C code for the stuff we're doing below:
46880 double xa = fabs (x);
46881 if (!isless (xa, TWO52))
46882 return x;
46883 xa = (double)(long)(xa + nextafter (0.5, 0.0));
46884 return copysign (xa, x);
46886 machine_mode mode = GET_MODE (operand0);
46887 rtx res, TWO52, xa, xi, half, mask;
46888 rtx_code_label *label;
46889 const struct real_format *fmt;
46890 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46892 /* Temporary for holding the result, initialized to the input
46893 operand to ease control flow. */
46894 res = gen_reg_rtx (mode);
46895 emit_move_insn (res, operand1);
46897 TWO52 = ix86_gen_TWO52 (mode);
46898 xa = ix86_expand_sse_fabs (res, &mask);
46899 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46901 /* load nextafter (0.5, 0.0) */
46902 fmt = REAL_MODE_FORMAT (mode);
46903 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46904 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46906 /* xa = xa + 0.5 */
46907 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
46908 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
46910 /* xa = (double)(int64_t)xa */
46911 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46912 expand_fix (xi, xa, 0);
46913 expand_float (xa, xi, 0);
46915 /* res = copysign (xa, operand1) */
46916 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
46918 emit_label (label);
46919 LABEL_NUSES (label) = 1;
46921 emit_move_insn (operand0, res);
46924 /* Expand SSE sequence for computing round
46925 from OP1 storing into OP0 using sse4 round insn. */
46926 void
46927 ix86_expand_round_sse4 (rtx op0, rtx op1)
46929 machine_mode mode = GET_MODE (op0);
46930 rtx e1, e2, res, half;
46931 const struct real_format *fmt;
46932 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46933 rtx (*gen_copysign) (rtx, rtx, rtx);
46934 rtx (*gen_round) (rtx, rtx, rtx);
46936 switch (mode)
46938 case SFmode:
46939 gen_copysign = gen_copysignsf3;
46940 gen_round = gen_sse4_1_roundsf2;
46941 break;
46942 case DFmode:
46943 gen_copysign = gen_copysigndf3;
46944 gen_round = gen_sse4_1_rounddf2;
46945 break;
46946 default:
46947 gcc_unreachable ();
46950 /* round (a) = trunc (a + copysign (0.5, a)) */
46952 /* load nextafter (0.5, 0.0) */
46953 fmt = REAL_MODE_FORMAT (mode);
46954 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46955 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46956 half = const_double_from_real_value (pred_half, mode);
46958 /* e1 = copysign (0.5, op1) */
46959 e1 = gen_reg_rtx (mode);
46960 emit_insn (gen_copysign (e1, half, op1));
46962 /* e2 = op1 + e1 */
46963 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
46965 /* res = trunc (e2) */
46966 res = gen_reg_rtx (mode);
46967 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
46969 emit_move_insn (op0, res);
46973 /* Table of valid machine attributes. */
46974 static const struct attribute_spec ix86_attribute_table[] =
46976 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
46977 affects_type_identity } */
46978 /* Stdcall attribute says callee is responsible for popping arguments
46979 if they are not variable. */
46980 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46981 true },
46982 /* Fastcall attribute says callee is responsible for popping arguments
46983 if they are not variable. */
46984 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46985 true },
46986 /* Thiscall attribute says callee is responsible for popping arguments
46987 if they are not variable. */
46988 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46989 true },
46990 /* Cdecl attribute says the callee is a normal C declaration */
46991 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46992 true },
46993 /* Regparm attribute specifies how many integer arguments are to be
46994 passed in registers. */
46995 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
46996 true },
46997 /* Sseregparm attribute says we are using x86_64 calling conventions
46998 for FP arguments. */
46999 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
47000 true },
47001 /* The transactional memory builtins are implicitly regparm or fastcall
47002 depending on the ABI. Override the generic do-nothing attribute that
47003 these builtins were declared with. */
47004 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
47005 true },
47006 /* force_align_arg_pointer says this function realigns the stack at entry. */
47007 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
47008 false, true, true, ix86_handle_cconv_attribute, false },
47009 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
47010 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
47011 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
47012 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
47013 false },
47014 #endif
47015 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
47016 false },
47017 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
47018 false },
47019 #ifdef SUBTARGET_ATTRIBUTE_TABLE
47020 SUBTARGET_ATTRIBUTE_TABLE,
47021 #endif
47022 /* ms_abi and sysv_abi calling convention function attributes. */
47023 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
47024 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
47025 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
47026 false },
47027 { "callee_pop_aggregate_return", 1, 1, false, true, true,
47028 ix86_handle_callee_pop_aggregate_return, true },
47029 /* End element. */
47030 { NULL, 0, 0, false, false, false, NULL, false }
47033 /* Implement targetm.vectorize.builtin_vectorization_cost. */
47034 static int
47035 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
47036 tree vectype, int)
47038 unsigned elements;
47040 switch (type_of_cost)
47042 case scalar_stmt:
47043 return ix86_cost->scalar_stmt_cost;
47045 case scalar_load:
47046 return ix86_cost->scalar_load_cost;
47048 case scalar_store:
47049 return ix86_cost->scalar_store_cost;
47051 case vector_stmt:
47052 return ix86_cost->vec_stmt_cost;
47054 case vector_load:
47055 return ix86_cost->vec_align_load_cost;
47057 case vector_store:
47058 return ix86_cost->vec_store_cost;
47060 case vec_to_scalar:
47061 return ix86_cost->vec_to_scalar_cost;
47063 case scalar_to_vec:
47064 return ix86_cost->scalar_to_vec_cost;
47066 case unaligned_load:
47067 case unaligned_store:
47068 return ix86_cost->vec_unalign_load_cost;
47070 case cond_branch_taken:
47071 return ix86_cost->cond_taken_branch_cost;
47073 case cond_branch_not_taken:
47074 return ix86_cost->cond_not_taken_branch_cost;
47076 case vec_perm:
47077 case vec_promote_demote:
47078 return ix86_cost->vec_stmt_cost;
47080 case vec_construct:
47081 elements = TYPE_VECTOR_SUBPARTS (vectype);
47082 return ix86_cost->vec_stmt_cost * (elements / 2 + 1);
47084 default:
47085 gcc_unreachable ();
47089 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
47090 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
47091 insn every time. */
47093 static GTY(()) rtx_insn *vselect_insn;
47095 /* Initialize vselect_insn. */
47097 static void
47098 init_vselect_insn (void)
47100 unsigned i;
47101 rtx x;
47103 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
47104 for (i = 0; i < MAX_VECT_LEN; ++i)
47105 XVECEXP (x, 0, i) = const0_rtx;
47106 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
47107 const0_rtx), x);
47108 x = gen_rtx_SET (const0_rtx, x);
47109 start_sequence ();
47110 vselect_insn = emit_insn (x);
47111 end_sequence ();
47114 /* Construct (set target (vec_select op0 (parallel perm))) and
47115 return true if that's a valid instruction in the active ISA. */
47117 static bool
47118 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
47119 unsigned nelt, bool testing_p)
47121 unsigned int i;
47122 rtx x, save_vconcat;
47123 int icode;
47125 if (vselect_insn == NULL_RTX)
47126 init_vselect_insn ();
47128 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
47129 PUT_NUM_ELEM (XVEC (x, 0), nelt);
47130 for (i = 0; i < nelt; ++i)
47131 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
47132 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
47133 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
47134 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
47135 SET_DEST (PATTERN (vselect_insn)) = target;
47136 icode = recog_memoized (vselect_insn);
47138 if (icode >= 0 && !testing_p)
47139 emit_insn (copy_rtx (PATTERN (vselect_insn)));
47141 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
47142 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
47143 INSN_CODE (vselect_insn) = -1;
47145 return icode >= 0;
47148 /* Similar, but generate a vec_concat from op0 and op1 as well. */
47150 static bool
47151 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
47152 const unsigned char *perm, unsigned nelt,
47153 bool testing_p)
47155 machine_mode v2mode;
47156 rtx x;
47157 bool ok;
47159 if (vselect_insn == NULL_RTX)
47160 init_vselect_insn ();
47162 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
47163 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
47164 PUT_MODE (x, v2mode);
47165 XEXP (x, 0) = op0;
47166 XEXP (x, 1) = op1;
47167 ok = expand_vselect (target, x, perm, nelt, testing_p);
47168 XEXP (x, 0) = const0_rtx;
47169 XEXP (x, 1) = const0_rtx;
47170 return ok;
47173 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47174 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
47176 static bool
47177 expand_vec_perm_blend (struct expand_vec_perm_d *d)
47179 machine_mode mmode, vmode = d->vmode;
47180 unsigned i, mask, nelt = d->nelt;
47181 rtx target, op0, op1, maskop, x;
47182 rtx rperm[32], vperm;
47184 if (d->one_operand_p)
47185 return false;
47186 if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
47187 && (TARGET_AVX512BW
47188 || GET_MODE_SIZE (GET_MODE_INNER (vmode)) >= 4))
47190 else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
47192 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
47194 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
47196 else
47197 return false;
47199 /* This is a blend, not a permute. Elements must stay in their
47200 respective lanes. */
47201 for (i = 0; i < nelt; ++i)
47203 unsigned e = d->perm[i];
47204 if (!(e == i || e == i + nelt))
47205 return false;
47208 if (d->testing_p)
47209 return true;
47211 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
47212 decision should be extracted elsewhere, so that we only try that
47213 sequence once all budget==3 options have been tried. */
47214 target = d->target;
47215 op0 = d->op0;
47216 op1 = d->op1;
47217 mask = 0;
47219 switch (vmode)
47221 case V8DFmode:
47222 case V16SFmode:
47223 case V4DFmode:
47224 case V8SFmode:
47225 case V2DFmode:
47226 case V4SFmode:
47227 case V8HImode:
47228 case V8SImode:
47229 case V32HImode:
47230 case V64QImode:
47231 case V16SImode:
47232 case V8DImode:
47233 for (i = 0; i < nelt; ++i)
47234 mask |= (d->perm[i] >= nelt) << i;
47235 break;
47237 case V2DImode:
47238 for (i = 0; i < 2; ++i)
47239 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
47240 vmode = V8HImode;
47241 goto do_subreg;
47243 case V4SImode:
47244 for (i = 0; i < 4; ++i)
47245 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
47246 vmode = V8HImode;
47247 goto do_subreg;
47249 case V16QImode:
47250 /* See if bytes move in pairs so we can use pblendw with
47251 an immediate argument, rather than pblendvb with a vector
47252 argument. */
47253 for (i = 0; i < 16; i += 2)
47254 if (d->perm[i] + 1 != d->perm[i + 1])
47256 use_pblendvb:
47257 for (i = 0; i < nelt; ++i)
47258 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
47260 finish_pblendvb:
47261 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
47262 vperm = force_reg (vmode, vperm);
47264 if (GET_MODE_SIZE (vmode) == 16)
47265 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
47266 else
47267 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
47268 if (target != d->target)
47269 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47270 return true;
47273 for (i = 0; i < 8; ++i)
47274 mask |= (d->perm[i * 2] >= 16) << i;
47275 vmode = V8HImode;
47276 /* FALLTHRU */
47278 do_subreg:
47279 target = gen_reg_rtx (vmode);
47280 op0 = gen_lowpart (vmode, op0);
47281 op1 = gen_lowpart (vmode, op1);
47282 break;
47284 case V32QImode:
47285 /* See if bytes move in pairs. If not, vpblendvb must be used. */
47286 for (i = 0; i < 32; i += 2)
47287 if (d->perm[i] + 1 != d->perm[i + 1])
47288 goto use_pblendvb;
47289 /* See if bytes move in quadruplets. If yes, vpblendd
47290 with immediate can be used. */
47291 for (i = 0; i < 32; i += 4)
47292 if (d->perm[i] + 2 != d->perm[i + 2])
47293 break;
47294 if (i < 32)
47296 /* See if bytes move the same in both lanes. If yes,
47297 vpblendw with immediate can be used. */
47298 for (i = 0; i < 16; i += 2)
47299 if (d->perm[i] + 16 != d->perm[i + 16])
47300 goto use_pblendvb;
47302 /* Use vpblendw. */
47303 for (i = 0; i < 16; ++i)
47304 mask |= (d->perm[i * 2] >= 32) << i;
47305 vmode = V16HImode;
47306 goto do_subreg;
47309 /* Use vpblendd. */
47310 for (i = 0; i < 8; ++i)
47311 mask |= (d->perm[i * 4] >= 32) << i;
47312 vmode = V8SImode;
47313 goto do_subreg;
47315 case V16HImode:
47316 /* See if words move in pairs. If yes, vpblendd can be used. */
47317 for (i = 0; i < 16; i += 2)
47318 if (d->perm[i] + 1 != d->perm[i + 1])
47319 break;
47320 if (i < 16)
47322 /* See if words move the same in both lanes. If not,
47323 vpblendvb must be used. */
47324 for (i = 0; i < 8; i++)
47325 if (d->perm[i] + 8 != d->perm[i + 8])
47327 /* Use vpblendvb. */
47328 for (i = 0; i < 32; ++i)
47329 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
47331 vmode = V32QImode;
47332 nelt = 32;
47333 target = gen_reg_rtx (vmode);
47334 op0 = gen_lowpart (vmode, op0);
47335 op1 = gen_lowpart (vmode, op1);
47336 goto finish_pblendvb;
47339 /* Use vpblendw. */
47340 for (i = 0; i < 16; ++i)
47341 mask |= (d->perm[i] >= 16) << i;
47342 break;
47345 /* Use vpblendd. */
47346 for (i = 0; i < 8; ++i)
47347 mask |= (d->perm[i * 2] >= 16) << i;
47348 vmode = V8SImode;
47349 goto do_subreg;
47351 case V4DImode:
47352 /* Use vpblendd. */
47353 for (i = 0; i < 4; ++i)
47354 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
47355 vmode = V8SImode;
47356 goto do_subreg;
47358 default:
47359 gcc_unreachable ();
47362 switch (vmode)
47364 case V8DFmode:
47365 case V8DImode:
47366 mmode = QImode;
47367 break;
47368 case V16SFmode:
47369 case V16SImode:
47370 mmode = HImode;
47371 break;
47372 case V32HImode:
47373 mmode = SImode;
47374 break;
47375 case V64QImode:
47376 mmode = DImode;
47377 break;
47378 default:
47379 mmode = VOIDmode;
47382 if (mmode != VOIDmode)
47383 maskop = force_reg (mmode, gen_int_mode (mask, mmode));
47384 else
47385 maskop = GEN_INT (mask);
47387 /* This matches five different patterns with the different modes. */
47388 x = gen_rtx_VEC_MERGE (vmode, op1, op0, maskop);
47389 x = gen_rtx_SET (target, x);
47390 emit_insn (x);
47391 if (target != d->target)
47392 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47394 return true;
47397 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47398 in terms of the variable form of vpermilps.
47400 Note that we will have already failed the immediate input vpermilps,
47401 which requires that the high and low part shuffle be identical; the
47402 variable form doesn't require that. */
47404 static bool
47405 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
47407 rtx rperm[8], vperm;
47408 unsigned i;
47410 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
47411 return false;
47413 /* We can only permute within the 128-bit lane. */
47414 for (i = 0; i < 8; ++i)
47416 unsigned e = d->perm[i];
47417 if (i < 4 ? e >= 4 : e < 4)
47418 return false;
47421 if (d->testing_p)
47422 return true;
47424 for (i = 0; i < 8; ++i)
47426 unsigned e = d->perm[i];
47428 /* Within each 128-bit lane, the elements of op0 are numbered
47429 from 0 and the elements of op1 are numbered from 4. */
47430 if (e >= 8 + 4)
47431 e -= 8;
47432 else if (e >= 4)
47433 e -= 4;
47435 rperm[i] = GEN_INT (e);
47438 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
47439 vperm = force_reg (V8SImode, vperm);
47440 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
47442 return true;
47445 /* Return true if permutation D can be performed as VMODE permutation
47446 instead. */
47448 static bool
47449 valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d)
47451 unsigned int i, j, chunk;
47453 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
47454 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
47455 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
47456 return false;
47458 if (GET_MODE_NUNITS (vmode) >= d->nelt)
47459 return true;
47461 chunk = d->nelt / GET_MODE_NUNITS (vmode);
47462 for (i = 0; i < d->nelt; i += chunk)
47463 if (d->perm[i] & (chunk - 1))
47464 return false;
47465 else
47466 for (j = 1; j < chunk; ++j)
47467 if (d->perm[i] + j != d->perm[i + j])
47468 return false;
47470 return true;
47473 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47474 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
47476 static bool
47477 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
47479 unsigned i, nelt, eltsz, mask;
47480 unsigned char perm[64];
47481 machine_mode vmode = V16QImode;
47482 rtx rperm[64], vperm, target, op0, op1;
47484 nelt = d->nelt;
47486 if (!d->one_operand_p)
47488 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
47490 if (TARGET_AVX2
47491 && valid_perm_using_mode_p (V2TImode, d))
47493 if (d->testing_p)
47494 return true;
47496 /* Use vperm2i128 insn. The pattern uses
47497 V4DImode instead of V2TImode. */
47498 target = d->target;
47499 if (d->vmode != V4DImode)
47500 target = gen_reg_rtx (V4DImode);
47501 op0 = gen_lowpart (V4DImode, d->op0);
47502 op1 = gen_lowpart (V4DImode, d->op1);
47503 rperm[0]
47504 = GEN_INT ((d->perm[0] / (nelt / 2))
47505 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
47506 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
47507 if (target != d->target)
47508 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47509 return true;
47511 return false;
47514 else
47516 if (GET_MODE_SIZE (d->vmode) == 16)
47518 if (!TARGET_SSSE3)
47519 return false;
47521 else if (GET_MODE_SIZE (d->vmode) == 32)
47523 if (!TARGET_AVX2)
47524 return false;
47526 /* V4DImode should be already handled through
47527 expand_vselect by vpermq instruction. */
47528 gcc_assert (d->vmode != V4DImode);
47530 vmode = V32QImode;
47531 if (d->vmode == V8SImode
47532 || d->vmode == V16HImode
47533 || d->vmode == V32QImode)
47535 /* First see if vpermq can be used for
47536 V8SImode/V16HImode/V32QImode. */
47537 if (valid_perm_using_mode_p (V4DImode, d))
47539 for (i = 0; i < 4; i++)
47540 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
47541 if (d->testing_p)
47542 return true;
47543 target = gen_reg_rtx (V4DImode);
47544 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
47545 perm, 4, false))
47547 emit_move_insn (d->target,
47548 gen_lowpart (d->vmode, target));
47549 return true;
47551 return false;
47554 /* Next see if vpermd can be used. */
47555 if (valid_perm_using_mode_p (V8SImode, d))
47556 vmode = V8SImode;
47558 /* Or if vpermps can be used. */
47559 else if (d->vmode == V8SFmode)
47560 vmode = V8SImode;
47562 if (vmode == V32QImode)
47564 /* vpshufb only works intra lanes, it is not
47565 possible to shuffle bytes in between the lanes. */
47566 for (i = 0; i < nelt; ++i)
47567 if ((d->perm[i] ^ i) & (nelt / 2))
47568 return false;
47571 else if (GET_MODE_SIZE (d->vmode) == 64)
47573 if (!TARGET_AVX512BW)
47574 return false;
47576 /* If vpermq didn't work, vpshufb won't work either. */
47577 if (d->vmode == V8DFmode || d->vmode == V8DImode)
47578 return false;
47580 vmode = V64QImode;
47581 if (d->vmode == V16SImode
47582 || d->vmode == V32HImode
47583 || d->vmode == V64QImode)
47585 /* First see if vpermq can be used for
47586 V16SImode/V32HImode/V64QImode. */
47587 if (valid_perm_using_mode_p (V8DImode, d))
47589 for (i = 0; i < 8; i++)
47590 perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7;
47591 if (d->testing_p)
47592 return true;
47593 target = gen_reg_rtx (V8DImode);
47594 if (expand_vselect (target, gen_lowpart (V8DImode, d->op0),
47595 perm, 8, false))
47597 emit_move_insn (d->target,
47598 gen_lowpart (d->vmode, target));
47599 return true;
47601 return false;
47604 /* Next see if vpermd can be used. */
47605 if (valid_perm_using_mode_p (V16SImode, d))
47606 vmode = V16SImode;
47608 /* Or if vpermps can be used. */
47609 else if (d->vmode == V16SFmode)
47610 vmode = V16SImode;
47611 if (vmode == V64QImode)
47613 /* vpshufb only works intra lanes, it is not
47614 possible to shuffle bytes in between the lanes. */
47615 for (i = 0; i < nelt; ++i)
47616 if ((d->perm[i] ^ i) & (nelt / 4))
47617 return false;
47620 else
47621 return false;
47624 if (d->testing_p)
47625 return true;
47627 if (vmode == V8SImode)
47628 for (i = 0; i < 8; ++i)
47629 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
47630 else if (vmode == V16SImode)
47631 for (i = 0; i < 16; ++i)
47632 rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15);
47633 else
47635 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
47636 if (!d->one_operand_p)
47637 mask = 2 * nelt - 1;
47638 else if (vmode == V16QImode)
47639 mask = nelt - 1;
47640 else if (vmode == V64QImode)
47641 mask = nelt / 4 - 1;
47642 else
47643 mask = nelt / 2 - 1;
47645 for (i = 0; i < nelt; ++i)
47647 unsigned j, e = d->perm[i] & mask;
47648 for (j = 0; j < eltsz; ++j)
47649 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
47653 vperm = gen_rtx_CONST_VECTOR (vmode,
47654 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
47655 vperm = force_reg (vmode, vperm);
47657 target = d->target;
47658 if (d->vmode != vmode)
47659 target = gen_reg_rtx (vmode);
47660 op0 = gen_lowpart (vmode, d->op0);
47661 if (d->one_operand_p)
47663 if (vmode == V16QImode)
47664 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
47665 else if (vmode == V32QImode)
47666 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
47667 else if (vmode == V64QImode)
47668 emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
47669 else if (vmode == V8SFmode)
47670 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
47671 else if (vmode == V8SImode)
47672 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
47673 else if (vmode == V16SFmode)
47674 emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm));
47675 else if (vmode == V16SImode)
47676 emit_insn (gen_avx512f_permvarv16si (target, op0, vperm));
47677 else
47678 gcc_unreachable ();
47680 else
47682 op1 = gen_lowpart (vmode, d->op1);
47683 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
47685 if (target != d->target)
47686 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47688 return true;
47691 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
47692 in a single instruction. */
47694 static bool
47695 expand_vec_perm_1 (struct expand_vec_perm_d *d)
47697 unsigned i, nelt = d->nelt;
47698 unsigned char perm2[MAX_VECT_LEN];
47700 /* Check plain VEC_SELECT first, because AVX has instructions that could
47701 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
47702 input where SEL+CONCAT may not. */
47703 if (d->one_operand_p)
47705 int mask = nelt - 1;
47706 bool identity_perm = true;
47707 bool broadcast_perm = true;
47709 for (i = 0; i < nelt; i++)
47711 perm2[i] = d->perm[i] & mask;
47712 if (perm2[i] != i)
47713 identity_perm = false;
47714 if (perm2[i])
47715 broadcast_perm = false;
47718 if (identity_perm)
47720 if (!d->testing_p)
47721 emit_move_insn (d->target, d->op0);
47722 return true;
47724 else if (broadcast_perm && TARGET_AVX2)
47726 /* Use vpbroadcast{b,w,d}. */
47727 rtx (*gen) (rtx, rtx) = NULL;
47728 switch (d->vmode)
47730 case V64QImode:
47731 if (TARGET_AVX512BW)
47732 gen = gen_avx512bw_vec_dupv64qi_1;
47733 break;
47734 case V32QImode:
47735 gen = gen_avx2_pbroadcastv32qi_1;
47736 break;
47737 case V32HImode:
47738 if (TARGET_AVX512BW)
47739 gen = gen_avx512bw_vec_dupv32hi_1;
47740 break;
47741 case V16HImode:
47742 gen = gen_avx2_pbroadcastv16hi_1;
47743 break;
47744 case V16SImode:
47745 if (TARGET_AVX512F)
47746 gen = gen_avx512f_vec_dupv16si_1;
47747 break;
47748 case V8SImode:
47749 gen = gen_avx2_pbroadcastv8si_1;
47750 break;
47751 case V16QImode:
47752 gen = gen_avx2_pbroadcastv16qi;
47753 break;
47754 case V8HImode:
47755 gen = gen_avx2_pbroadcastv8hi;
47756 break;
47757 case V16SFmode:
47758 if (TARGET_AVX512F)
47759 gen = gen_avx512f_vec_dupv16sf_1;
47760 break;
47761 case V8SFmode:
47762 gen = gen_avx2_vec_dupv8sf_1;
47763 break;
47764 case V8DFmode:
47765 if (TARGET_AVX512F)
47766 gen = gen_avx512f_vec_dupv8df_1;
47767 break;
47768 case V8DImode:
47769 if (TARGET_AVX512F)
47770 gen = gen_avx512f_vec_dupv8di_1;
47771 break;
47772 /* For other modes prefer other shuffles this function creates. */
47773 default: break;
47775 if (gen != NULL)
47777 if (!d->testing_p)
47778 emit_insn (gen (d->target, d->op0));
47779 return true;
47783 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
47784 return true;
47786 /* There are plenty of patterns in sse.md that are written for
47787 SEL+CONCAT and are not replicated for a single op. Perhaps
47788 that should be changed, to avoid the nastiness here. */
47790 /* Recognize interleave style patterns, which means incrementing
47791 every other permutation operand. */
47792 for (i = 0; i < nelt; i += 2)
47794 perm2[i] = d->perm[i] & mask;
47795 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
47797 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47798 d->testing_p))
47799 return true;
47801 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
47802 if (nelt >= 4)
47804 for (i = 0; i < nelt; i += 4)
47806 perm2[i + 0] = d->perm[i + 0] & mask;
47807 perm2[i + 1] = d->perm[i + 1] & mask;
47808 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
47809 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
47812 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47813 d->testing_p))
47814 return true;
47818 /* Finally, try the fully general two operand permute. */
47819 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
47820 d->testing_p))
47821 return true;
47823 /* Recognize interleave style patterns with reversed operands. */
47824 if (!d->one_operand_p)
47826 for (i = 0; i < nelt; ++i)
47828 unsigned e = d->perm[i];
47829 if (e >= nelt)
47830 e -= nelt;
47831 else
47832 e += nelt;
47833 perm2[i] = e;
47836 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
47837 d->testing_p))
47838 return true;
47841 /* Try the SSE4.1 blend variable merge instructions. */
47842 if (expand_vec_perm_blend (d))
47843 return true;
47845 /* Try one of the AVX vpermil variable permutations. */
47846 if (expand_vec_perm_vpermil (d))
47847 return true;
47849 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
47850 vpshufb, vpermd, vpermps or vpermq variable permutation. */
47851 if (expand_vec_perm_pshufb (d))
47852 return true;
47854 /* Try the AVX2 vpalignr instruction. */
47855 if (expand_vec_perm_palignr (d, true))
47856 return true;
47858 /* Try the AVX512F vpermi2 instructions. */
47859 if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
47860 return true;
47862 return false;
47865 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47866 in terms of a pair of pshuflw + pshufhw instructions. */
47868 static bool
47869 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
47871 unsigned char perm2[MAX_VECT_LEN];
47872 unsigned i;
47873 bool ok;
47875 if (d->vmode != V8HImode || !d->one_operand_p)
47876 return false;
47878 /* The two permutations only operate in 64-bit lanes. */
47879 for (i = 0; i < 4; ++i)
47880 if (d->perm[i] >= 4)
47881 return false;
47882 for (i = 4; i < 8; ++i)
47883 if (d->perm[i] < 4)
47884 return false;
47886 if (d->testing_p)
47887 return true;
47889 /* Emit the pshuflw. */
47890 memcpy (perm2, d->perm, 4);
47891 for (i = 4; i < 8; ++i)
47892 perm2[i] = i;
47893 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
47894 gcc_assert (ok);
47896 /* Emit the pshufhw. */
47897 memcpy (perm2 + 4, d->perm + 4, 4);
47898 for (i = 0; i < 4; ++i)
47899 perm2[i] = i;
47900 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
47901 gcc_assert (ok);
47903 return true;
47906 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47907 the permutation using the SSSE3 palignr instruction. This succeeds
47908 when all of the elements in PERM fit within one vector and we merely
47909 need to shift them down so that a single vector permutation has a
47910 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
47911 the vpalignr instruction itself can perform the requested permutation. */
47913 static bool
47914 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
47916 unsigned i, nelt = d->nelt;
47917 unsigned min, max, minswap, maxswap;
47918 bool in_order, ok, swap = false;
47919 rtx shift, target;
47920 struct expand_vec_perm_d dcopy;
47922 /* Even with AVX, palignr only operates on 128-bit vectors,
47923 in AVX2 palignr operates on both 128-bit lanes. */
47924 if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
47925 && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
47926 return false;
47928 min = 2 * nelt;
47929 max = 0;
47930 minswap = 2 * nelt;
47931 maxswap = 0;
47932 for (i = 0; i < nelt; ++i)
47934 unsigned e = d->perm[i];
47935 unsigned eswap = d->perm[i] ^ nelt;
47936 if (GET_MODE_SIZE (d->vmode) == 32)
47938 e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
47939 eswap = e ^ (nelt / 2);
47941 if (e < min)
47942 min = e;
47943 if (e > max)
47944 max = e;
47945 if (eswap < minswap)
47946 minswap = eswap;
47947 if (eswap > maxswap)
47948 maxswap = eswap;
47950 if (min == 0
47951 || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
47953 if (d->one_operand_p
47954 || minswap == 0
47955 || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
47956 ? nelt / 2 : nelt))
47957 return false;
47958 swap = true;
47959 min = minswap;
47960 max = maxswap;
47963 /* Given that we have SSSE3, we know we'll be able to implement the
47964 single operand permutation after the palignr with pshufb for
47965 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
47966 first. */
47967 if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
47968 return true;
47970 dcopy = *d;
47971 if (swap)
47973 dcopy.op0 = d->op1;
47974 dcopy.op1 = d->op0;
47975 for (i = 0; i < nelt; ++i)
47976 dcopy.perm[i] ^= nelt;
47979 in_order = true;
47980 for (i = 0; i < nelt; ++i)
47982 unsigned e = dcopy.perm[i];
47983 if (GET_MODE_SIZE (d->vmode) == 32
47984 && e >= nelt
47985 && (e & (nelt / 2 - 1)) < min)
47986 e = e - min - (nelt / 2);
47987 else
47988 e = e - min;
47989 if (e != i)
47990 in_order = false;
47991 dcopy.perm[i] = e;
47993 dcopy.one_operand_p = true;
47995 if (single_insn_only_p && !in_order)
47996 return false;
47998 /* For AVX2, test whether we can permute the result in one instruction. */
47999 if (d->testing_p)
48001 if (in_order)
48002 return true;
48003 dcopy.op1 = dcopy.op0;
48004 return expand_vec_perm_1 (&dcopy);
48007 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
48008 if (GET_MODE_SIZE (d->vmode) == 16)
48010 target = gen_reg_rtx (TImode);
48011 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
48012 gen_lowpart (TImode, dcopy.op0), shift));
48014 else
48016 target = gen_reg_rtx (V2TImode);
48017 emit_insn (gen_avx2_palignrv2ti (target,
48018 gen_lowpart (V2TImode, dcopy.op1),
48019 gen_lowpart (V2TImode, dcopy.op0),
48020 shift));
48023 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
48025 /* Test for the degenerate case where the alignment by itself
48026 produces the desired permutation. */
48027 if (in_order)
48029 emit_move_insn (d->target, dcopy.op0);
48030 return true;
48033 ok = expand_vec_perm_1 (&dcopy);
48034 gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
48036 return ok;
48039 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
48040 the permutation using the SSE4_1 pblendv instruction. Potentially
48041 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
48043 static bool
48044 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
48046 unsigned i, which, nelt = d->nelt;
48047 struct expand_vec_perm_d dcopy, dcopy1;
48048 machine_mode vmode = d->vmode;
48049 bool ok;
48051 /* Use the same checks as in expand_vec_perm_blend. */
48052 if (d->one_operand_p)
48053 return false;
48054 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
48056 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
48058 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
48060 else
48061 return false;
48063 /* Figure out where permutation elements stay not in their
48064 respective lanes. */
48065 for (i = 0, which = 0; i < nelt; ++i)
48067 unsigned e = d->perm[i];
48068 if (e != i)
48069 which |= (e < nelt ? 1 : 2);
48071 /* We can pblend the part where elements stay not in their
48072 respective lanes only when these elements are all in one
48073 half of a permutation.
48074 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
48075 lanes, but both 8 and 9 >= 8
48076 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
48077 respective lanes and 8 >= 8, but 2 not. */
48078 if (which != 1 && which != 2)
48079 return false;
48080 if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
48081 return true;
48083 /* First we apply one operand permutation to the part where
48084 elements stay not in their respective lanes. */
48085 dcopy = *d;
48086 if (which == 2)
48087 dcopy.op0 = dcopy.op1 = d->op1;
48088 else
48089 dcopy.op0 = dcopy.op1 = d->op0;
48090 if (!d->testing_p)
48091 dcopy.target = gen_reg_rtx (vmode);
48092 dcopy.one_operand_p = true;
48094 for (i = 0; i < nelt; ++i)
48095 dcopy.perm[i] = d->perm[i] & (nelt - 1);
48097 ok = expand_vec_perm_1 (&dcopy);
48098 if (GET_MODE_SIZE (vmode) != 16 && !ok)
48099 return false;
48100 else
48101 gcc_assert (ok);
48102 if (d->testing_p)
48103 return true;
48105 /* Next we put permuted elements into their positions. */
48106 dcopy1 = *d;
48107 if (which == 2)
48108 dcopy1.op1 = dcopy.target;
48109 else
48110 dcopy1.op0 = dcopy.target;
48112 for (i = 0; i < nelt; ++i)
48113 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
48115 ok = expand_vec_perm_blend (&dcopy1);
48116 gcc_assert (ok);
48118 return true;
48121 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
48123 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
48124 a two vector permutation into a single vector permutation by using
48125 an interleave operation to merge the vectors. */
48127 static bool
48128 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
48130 struct expand_vec_perm_d dremap, dfinal;
48131 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
48132 unsigned HOST_WIDE_INT contents;
48133 unsigned char remap[2 * MAX_VECT_LEN];
48134 rtx_insn *seq;
48135 bool ok, same_halves = false;
48137 if (GET_MODE_SIZE (d->vmode) == 16)
48139 if (d->one_operand_p)
48140 return false;
48142 else if (GET_MODE_SIZE (d->vmode) == 32)
48144 if (!TARGET_AVX)
48145 return false;
48146 /* For 32-byte modes allow even d->one_operand_p.
48147 The lack of cross-lane shuffling in some instructions
48148 might prevent a single insn shuffle. */
48149 dfinal = *d;
48150 dfinal.testing_p = true;
48151 /* If expand_vec_perm_interleave3 can expand this into
48152 a 3 insn sequence, give up and let it be expanded as
48153 3 insn sequence. While that is one insn longer,
48154 it doesn't need a memory operand and in the common
48155 case that both interleave low and high permutations
48156 with the same operands are adjacent needs 4 insns
48157 for both after CSE. */
48158 if (expand_vec_perm_interleave3 (&dfinal))
48159 return false;
48161 else
48162 return false;
48164 /* Examine from whence the elements come. */
48165 contents = 0;
48166 for (i = 0; i < nelt; ++i)
48167 contents |= HOST_WIDE_INT_1U << d->perm[i];
48169 memset (remap, 0xff, sizeof (remap));
48170 dremap = *d;
48172 if (GET_MODE_SIZE (d->vmode) == 16)
48174 unsigned HOST_WIDE_INT h1, h2, h3, h4;
48176 /* Split the two input vectors into 4 halves. */
48177 h1 = (HOST_WIDE_INT_1U << nelt2) - 1;
48178 h2 = h1 << nelt2;
48179 h3 = h2 << nelt2;
48180 h4 = h3 << nelt2;
48182 /* If the elements from the low halves use interleave low, and similarly
48183 for interleave high. If the elements are from mis-matched halves, we
48184 can use shufps for V4SF/V4SI or do a DImode shuffle. */
48185 if ((contents & (h1 | h3)) == contents)
48187 /* punpckl* */
48188 for (i = 0; i < nelt2; ++i)
48190 remap[i] = i * 2;
48191 remap[i + nelt] = i * 2 + 1;
48192 dremap.perm[i * 2] = i;
48193 dremap.perm[i * 2 + 1] = i + nelt;
48195 if (!TARGET_SSE2 && d->vmode == V4SImode)
48196 dremap.vmode = V4SFmode;
48198 else if ((contents & (h2 | h4)) == contents)
48200 /* punpckh* */
48201 for (i = 0; i < nelt2; ++i)
48203 remap[i + nelt2] = i * 2;
48204 remap[i + nelt + nelt2] = i * 2 + 1;
48205 dremap.perm[i * 2] = i + nelt2;
48206 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
48208 if (!TARGET_SSE2 && d->vmode == V4SImode)
48209 dremap.vmode = V4SFmode;
48211 else if ((contents & (h1 | h4)) == contents)
48213 /* shufps */
48214 for (i = 0; i < nelt2; ++i)
48216 remap[i] = i;
48217 remap[i + nelt + nelt2] = i + nelt2;
48218 dremap.perm[i] = i;
48219 dremap.perm[i + nelt2] = i + nelt + nelt2;
48221 if (nelt != 4)
48223 /* shufpd */
48224 dremap.vmode = V2DImode;
48225 dremap.nelt = 2;
48226 dremap.perm[0] = 0;
48227 dremap.perm[1] = 3;
48230 else if ((contents & (h2 | h3)) == contents)
48232 /* shufps */
48233 for (i = 0; i < nelt2; ++i)
48235 remap[i + nelt2] = i;
48236 remap[i + nelt] = i + nelt2;
48237 dremap.perm[i] = i + nelt2;
48238 dremap.perm[i + nelt2] = i + nelt;
48240 if (nelt != 4)
48242 /* shufpd */
48243 dremap.vmode = V2DImode;
48244 dremap.nelt = 2;
48245 dremap.perm[0] = 1;
48246 dremap.perm[1] = 2;
48249 else
48250 return false;
48252 else
48254 unsigned int nelt4 = nelt / 4, nzcnt = 0;
48255 unsigned HOST_WIDE_INT q[8];
48256 unsigned int nonzero_halves[4];
48258 /* Split the two input vectors into 8 quarters. */
48259 q[0] = (HOST_WIDE_INT_1U << nelt4) - 1;
48260 for (i = 1; i < 8; ++i)
48261 q[i] = q[0] << (nelt4 * i);
48262 for (i = 0; i < 4; ++i)
48263 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
48265 nonzero_halves[nzcnt] = i;
48266 ++nzcnt;
48269 if (nzcnt == 1)
48271 gcc_assert (d->one_operand_p);
48272 nonzero_halves[1] = nonzero_halves[0];
48273 same_halves = true;
48275 else if (d->one_operand_p)
48277 gcc_assert (nonzero_halves[0] == 0);
48278 gcc_assert (nonzero_halves[1] == 1);
48281 if (nzcnt <= 2)
48283 if (d->perm[0] / nelt2 == nonzero_halves[1])
48285 /* Attempt to increase the likelihood that dfinal
48286 shuffle will be intra-lane. */
48287 std::swap (nonzero_halves[0], nonzero_halves[1]);
48290 /* vperm2f128 or vperm2i128. */
48291 for (i = 0; i < nelt2; ++i)
48293 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
48294 remap[i + nonzero_halves[0] * nelt2] = i;
48295 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
48296 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
48299 if (d->vmode != V8SFmode
48300 && d->vmode != V4DFmode
48301 && d->vmode != V8SImode)
48303 dremap.vmode = V8SImode;
48304 dremap.nelt = 8;
48305 for (i = 0; i < 4; ++i)
48307 dremap.perm[i] = i + nonzero_halves[0] * 4;
48308 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
48312 else if (d->one_operand_p)
48313 return false;
48314 else if (TARGET_AVX2
48315 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
48317 /* vpunpckl* */
48318 for (i = 0; i < nelt4; ++i)
48320 remap[i] = i * 2;
48321 remap[i + nelt] = i * 2 + 1;
48322 remap[i + nelt2] = i * 2 + nelt2;
48323 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
48324 dremap.perm[i * 2] = i;
48325 dremap.perm[i * 2 + 1] = i + nelt;
48326 dremap.perm[i * 2 + nelt2] = i + nelt2;
48327 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
48330 else if (TARGET_AVX2
48331 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
48333 /* vpunpckh* */
48334 for (i = 0; i < nelt4; ++i)
48336 remap[i + nelt4] = i * 2;
48337 remap[i + nelt + nelt4] = i * 2 + 1;
48338 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
48339 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
48340 dremap.perm[i * 2] = i + nelt4;
48341 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
48342 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
48343 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
48346 else
48347 return false;
48350 /* Use the remapping array set up above to move the elements from their
48351 swizzled locations into their final destinations. */
48352 dfinal = *d;
48353 for (i = 0; i < nelt; ++i)
48355 unsigned e = remap[d->perm[i]];
48356 gcc_assert (e < nelt);
48357 /* If same_halves is true, both halves of the remapped vector are the
48358 same. Avoid cross-lane accesses if possible. */
48359 if (same_halves && i >= nelt2)
48361 gcc_assert (e < nelt2);
48362 dfinal.perm[i] = e + nelt2;
48364 else
48365 dfinal.perm[i] = e;
48367 if (!d->testing_p)
48369 dremap.target = gen_reg_rtx (dremap.vmode);
48370 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
48372 dfinal.op1 = dfinal.op0;
48373 dfinal.one_operand_p = true;
48375 /* Test if the final remap can be done with a single insn. For V4SFmode or
48376 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
48377 start_sequence ();
48378 ok = expand_vec_perm_1 (&dfinal);
48379 seq = get_insns ();
48380 end_sequence ();
48382 if (!ok)
48383 return false;
48385 if (d->testing_p)
48386 return true;
48388 if (dremap.vmode != dfinal.vmode)
48390 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
48391 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
48394 ok = expand_vec_perm_1 (&dremap);
48395 gcc_assert (ok);
48397 emit_insn (seq);
48398 return true;
48401 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
48402 a single vector cross-lane permutation into vpermq followed
48403 by any of the single insn permutations. */
48405 static bool
48406 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
48408 struct expand_vec_perm_d dremap, dfinal;
48409 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
48410 unsigned contents[2];
48411 bool ok;
48413 if (!(TARGET_AVX2
48414 && (d->vmode == V32QImode || d->vmode == V16HImode)
48415 && d->one_operand_p))
48416 return false;
48418 contents[0] = 0;
48419 contents[1] = 0;
48420 for (i = 0; i < nelt2; ++i)
48422 contents[0] |= 1u << (d->perm[i] / nelt4);
48423 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
48426 for (i = 0; i < 2; ++i)
48428 unsigned int cnt = 0;
48429 for (j = 0; j < 4; ++j)
48430 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
48431 return false;
48434 if (d->testing_p)
48435 return true;
48437 dremap = *d;
48438 dremap.vmode = V4DImode;
48439 dremap.nelt = 4;
48440 dremap.target = gen_reg_rtx (V4DImode);
48441 dremap.op0 = gen_lowpart (V4DImode, d->op0);
48442 dremap.op1 = dremap.op0;
48443 dremap.one_operand_p = true;
48444 for (i = 0; i < 2; ++i)
48446 unsigned int cnt = 0;
48447 for (j = 0; j < 4; ++j)
48448 if ((contents[i] & (1u << j)) != 0)
48449 dremap.perm[2 * i + cnt++] = j;
48450 for (; cnt < 2; ++cnt)
48451 dremap.perm[2 * i + cnt] = 0;
48454 dfinal = *d;
48455 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
48456 dfinal.op1 = dfinal.op0;
48457 dfinal.one_operand_p = true;
48458 for (i = 0, j = 0; i < nelt; ++i)
48460 if (i == nelt2)
48461 j = 2;
48462 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
48463 if ((d->perm[i] / nelt4) == dremap.perm[j])
48465 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
48466 dfinal.perm[i] |= nelt4;
48467 else
48468 gcc_unreachable ();
48471 ok = expand_vec_perm_1 (&dremap);
48472 gcc_assert (ok);
48474 ok = expand_vec_perm_1 (&dfinal);
48475 gcc_assert (ok);
48477 return true;
48480 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
48481 a vector permutation using two instructions, vperm2f128 resp.
48482 vperm2i128 followed by any single in-lane permutation. */
48484 static bool
48485 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
48487 struct expand_vec_perm_d dfirst, dsecond;
48488 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
48489 bool ok;
48491 if (!TARGET_AVX
48492 || GET_MODE_SIZE (d->vmode) != 32
48493 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
48494 return false;
48496 dsecond = *d;
48497 dsecond.one_operand_p = false;
48498 dsecond.testing_p = true;
48500 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
48501 immediate. For perm < 16 the second permutation uses
48502 d->op0 as first operand, for perm >= 16 it uses d->op1
48503 as first operand. The second operand is the result of
48504 vperm2[fi]128. */
48505 for (perm = 0; perm < 32; perm++)
48507 /* Ignore permutations which do not move anything cross-lane. */
48508 if (perm < 16)
48510 /* The second shuffle for e.g. V4DFmode has
48511 0123 and ABCD operands.
48512 Ignore AB23, as 23 is already in the second lane
48513 of the first operand. */
48514 if ((perm & 0xc) == (1 << 2)) continue;
48515 /* And 01CD, as 01 is in the first lane of the first
48516 operand. */
48517 if ((perm & 3) == 0) continue;
48518 /* And 4567, as then the vperm2[fi]128 doesn't change
48519 anything on the original 4567 second operand. */
48520 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
48522 else
48524 /* The second shuffle for e.g. V4DFmode has
48525 4567 and ABCD operands.
48526 Ignore AB67, as 67 is already in the second lane
48527 of the first operand. */
48528 if ((perm & 0xc) == (3 << 2)) continue;
48529 /* And 45CD, as 45 is in the first lane of the first
48530 operand. */
48531 if ((perm & 3) == 2) continue;
48532 /* And 0123, as then the vperm2[fi]128 doesn't change
48533 anything on the original 0123 first operand. */
48534 if ((perm & 0xf) == (1 << 2)) continue;
48537 for (i = 0; i < nelt; i++)
48539 j = d->perm[i] / nelt2;
48540 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
48541 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
48542 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
48543 dsecond.perm[i] = d->perm[i] & (nelt - 1);
48544 else
48545 break;
48548 if (i == nelt)
48550 start_sequence ();
48551 ok = expand_vec_perm_1 (&dsecond);
48552 end_sequence ();
48554 else
48555 ok = false;
48557 if (ok)
48559 if (d->testing_p)
48560 return true;
48562 /* Found a usable second shuffle. dfirst will be
48563 vperm2f128 on d->op0 and d->op1. */
48564 dsecond.testing_p = false;
48565 dfirst = *d;
48566 dfirst.target = gen_reg_rtx (d->vmode);
48567 for (i = 0; i < nelt; i++)
48568 dfirst.perm[i] = (i & (nelt2 - 1))
48569 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
48571 canonicalize_perm (&dfirst);
48572 ok = expand_vec_perm_1 (&dfirst);
48573 gcc_assert (ok);
48575 /* And dsecond is some single insn shuffle, taking
48576 d->op0 and result of vperm2f128 (if perm < 16) or
48577 d->op1 and result of vperm2f128 (otherwise). */
48578 if (perm >= 16)
48579 dsecond.op0 = dsecond.op1;
48580 dsecond.op1 = dfirst.target;
48582 ok = expand_vec_perm_1 (&dsecond);
48583 gcc_assert (ok);
48585 return true;
48588 /* For one operand, the only useful vperm2f128 permutation is 0x01
48589 aka lanes swap. */
48590 if (d->one_operand_p)
48591 return false;
48594 return false;
48597 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
48598 a two vector permutation using 2 intra-lane interleave insns
48599 and cross-lane shuffle for 32-byte vectors. */
48601 static bool
48602 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
48604 unsigned i, nelt;
48605 rtx (*gen) (rtx, rtx, rtx);
48607 if (d->one_operand_p)
48608 return false;
48609 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
48611 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
48613 else
48614 return false;
48616 nelt = d->nelt;
48617 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
48618 return false;
48619 for (i = 0; i < nelt; i += 2)
48620 if (d->perm[i] != d->perm[0] + i / 2
48621 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
48622 return false;
48624 if (d->testing_p)
48625 return true;
48627 switch (d->vmode)
48629 case V32QImode:
48630 if (d->perm[0])
48631 gen = gen_vec_interleave_highv32qi;
48632 else
48633 gen = gen_vec_interleave_lowv32qi;
48634 break;
48635 case V16HImode:
48636 if (d->perm[0])
48637 gen = gen_vec_interleave_highv16hi;
48638 else
48639 gen = gen_vec_interleave_lowv16hi;
48640 break;
48641 case V8SImode:
48642 if (d->perm[0])
48643 gen = gen_vec_interleave_highv8si;
48644 else
48645 gen = gen_vec_interleave_lowv8si;
48646 break;
48647 case V4DImode:
48648 if (d->perm[0])
48649 gen = gen_vec_interleave_highv4di;
48650 else
48651 gen = gen_vec_interleave_lowv4di;
48652 break;
48653 case V8SFmode:
48654 if (d->perm[0])
48655 gen = gen_vec_interleave_highv8sf;
48656 else
48657 gen = gen_vec_interleave_lowv8sf;
48658 break;
48659 case V4DFmode:
48660 if (d->perm[0])
48661 gen = gen_vec_interleave_highv4df;
48662 else
48663 gen = gen_vec_interleave_lowv4df;
48664 break;
48665 default:
48666 gcc_unreachable ();
48669 emit_insn (gen (d->target, d->op0, d->op1));
48670 return true;
48673 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
48674 a single vector permutation using a single intra-lane vector
48675 permutation, vperm2f128 swapping the lanes and vblend* insn blending
48676 the non-swapped and swapped vectors together. */
48678 static bool
48679 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
48681 struct expand_vec_perm_d dfirst, dsecond;
48682 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
48683 rtx_insn *seq;
48684 bool ok;
48685 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
48687 if (!TARGET_AVX
48688 || TARGET_AVX2
48689 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
48690 || !d->one_operand_p)
48691 return false;
48693 dfirst = *d;
48694 for (i = 0; i < nelt; i++)
48695 dfirst.perm[i] = 0xff;
48696 for (i = 0, msk = 0; i < nelt; i++)
48698 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
48699 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
48700 return false;
48701 dfirst.perm[j] = d->perm[i];
48702 if (j != i)
48703 msk |= (1 << i);
48705 for (i = 0; i < nelt; i++)
48706 if (dfirst.perm[i] == 0xff)
48707 dfirst.perm[i] = i;
48709 if (!d->testing_p)
48710 dfirst.target = gen_reg_rtx (dfirst.vmode);
48712 start_sequence ();
48713 ok = expand_vec_perm_1 (&dfirst);
48714 seq = get_insns ();
48715 end_sequence ();
48717 if (!ok)
48718 return false;
48720 if (d->testing_p)
48721 return true;
48723 emit_insn (seq);
48725 dsecond = *d;
48726 dsecond.op0 = dfirst.target;
48727 dsecond.op1 = dfirst.target;
48728 dsecond.one_operand_p = true;
48729 dsecond.target = gen_reg_rtx (dsecond.vmode);
48730 for (i = 0; i < nelt; i++)
48731 dsecond.perm[i] = i ^ nelt2;
48733 ok = expand_vec_perm_1 (&dsecond);
48734 gcc_assert (ok);
48736 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
48737 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
48738 return true;
48741 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
48742 permutation using two vperm2f128, followed by a vshufpd insn blending
48743 the two vectors together. */
48745 static bool
48746 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
48748 struct expand_vec_perm_d dfirst, dsecond, dthird;
48749 bool ok;
48751 if (!TARGET_AVX || (d->vmode != V4DFmode))
48752 return false;
48754 if (d->testing_p)
48755 return true;
48757 dfirst = *d;
48758 dsecond = *d;
48759 dthird = *d;
48761 dfirst.perm[0] = (d->perm[0] & ~1);
48762 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
48763 dfirst.perm[2] = (d->perm[2] & ~1);
48764 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
48765 dsecond.perm[0] = (d->perm[1] & ~1);
48766 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
48767 dsecond.perm[2] = (d->perm[3] & ~1);
48768 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
48769 dthird.perm[0] = (d->perm[0] % 2);
48770 dthird.perm[1] = (d->perm[1] % 2) + 4;
48771 dthird.perm[2] = (d->perm[2] % 2) + 2;
48772 dthird.perm[3] = (d->perm[3] % 2) + 6;
48774 dfirst.target = gen_reg_rtx (dfirst.vmode);
48775 dsecond.target = gen_reg_rtx (dsecond.vmode);
48776 dthird.op0 = dfirst.target;
48777 dthird.op1 = dsecond.target;
48778 dthird.one_operand_p = false;
48780 canonicalize_perm (&dfirst);
48781 canonicalize_perm (&dsecond);
48783 ok = expand_vec_perm_1 (&dfirst)
48784 && expand_vec_perm_1 (&dsecond)
48785 && expand_vec_perm_1 (&dthird);
48787 gcc_assert (ok);
48789 return true;
48792 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
48793 permutation with two pshufb insns and an ior. We should have already
48794 failed all two instruction sequences. */
48796 static bool
48797 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
48799 rtx rperm[2][16], vperm, l, h, op, m128;
48800 unsigned int i, nelt, eltsz;
48802 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
48803 return false;
48804 gcc_assert (!d->one_operand_p);
48806 if (d->testing_p)
48807 return true;
48809 nelt = d->nelt;
48810 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48812 /* Generate two permutation masks. If the required element is within
48813 the given vector it is shuffled into the proper lane. If the required
48814 element is in the other vector, force a zero into the lane by setting
48815 bit 7 in the permutation mask. */
48816 m128 = GEN_INT (-128);
48817 for (i = 0; i < nelt; ++i)
48819 unsigned j, e = d->perm[i];
48820 unsigned which = (e >= nelt);
48821 if (e >= nelt)
48822 e -= nelt;
48824 for (j = 0; j < eltsz; ++j)
48826 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
48827 rperm[1-which][i*eltsz + j] = m128;
48831 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
48832 vperm = force_reg (V16QImode, vperm);
48834 l = gen_reg_rtx (V16QImode);
48835 op = gen_lowpart (V16QImode, d->op0);
48836 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
48838 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
48839 vperm = force_reg (V16QImode, vperm);
48841 h = gen_reg_rtx (V16QImode);
48842 op = gen_lowpart (V16QImode, d->op1);
48843 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
48845 op = d->target;
48846 if (d->vmode != V16QImode)
48847 op = gen_reg_rtx (V16QImode);
48848 emit_insn (gen_iorv16qi3 (op, l, h));
48849 if (op != d->target)
48850 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48852 return true;
48855 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
48856 with two vpshufb insns, vpermq and vpor. We should have already failed
48857 all two or three instruction sequences. */
48859 static bool
48860 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
48862 rtx rperm[2][32], vperm, l, h, hp, op, m128;
48863 unsigned int i, nelt, eltsz;
48865 if (!TARGET_AVX2
48866 || !d->one_operand_p
48867 || (d->vmode != V32QImode && d->vmode != V16HImode))
48868 return false;
48870 if (d->testing_p)
48871 return true;
48873 nelt = d->nelt;
48874 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48876 /* Generate two permutation masks. If the required element is within
48877 the same lane, it is shuffled in. If the required element from the
48878 other lane, force a zero by setting bit 7 in the permutation mask.
48879 In the other mask the mask has non-negative elements if element
48880 is requested from the other lane, but also moved to the other lane,
48881 so that the result of vpshufb can have the two V2TImode halves
48882 swapped. */
48883 m128 = GEN_INT (-128);
48884 for (i = 0; i < nelt; ++i)
48886 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48887 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
48889 for (j = 0; j < eltsz; ++j)
48891 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
48892 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
48896 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48897 vperm = force_reg (V32QImode, vperm);
48899 h = gen_reg_rtx (V32QImode);
48900 op = gen_lowpart (V32QImode, d->op0);
48901 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48903 /* Swap the 128-byte lanes of h into hp. */
48904 hp = gen_reg_rtx (V4DImode);
48905 op = gen_lowpart (V4DImode, h);
48906 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
48907 const1_rtx));
48909 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48910 vperm = force_reg (V32QImode, vperm);
48912 l = gen_reg_rtx (V32QImode);
48913 op = gen_lowpart (V32QImode, d->op0);
48914 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48916 op = d->target;
48917 if (d->vmode != V32QImode)
48918 op = gen_reg_rtx (V32QImode);
48919 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
48920 if (op != d->target)
48921 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48923 return true;
48926 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48927 and extract-odd permutations of two V32QImode and V16QImode operand
48928 with two vpshufb insns, vpor and vpermq. We should have already
48929 failed all two or three instruction sequences. */
48931 static bool
48932 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
48934 rtx rperm[2][32], vperm, l, h, ior, op, m128;
48935 unsigned int i, nelt, eltsz;
48937 if (!TARGET_AVX2
48938 || d->one_operand_p
48939 || (d->vmode != V32QImode && d->vmode != V16HImode))
48940 return false;
48942 for (i = 0; i < d->nelt; ++i)
48943 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
48944 return false;
48946 if (d->testing_p)
48947 return true;
48949 nelt = d->nelt;
48950 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48952 /* Generate two permutation masks. In the first permutation mask
48953 the first quarter will contain indexes for the first half
48954 of the op0, the second quarter will contain bit 7 set, third quarter
48955 will contain indexes for the second half of the op0 and the
48956 last quarter bit 7 set. In the second permutation mask
48957 the first quarter will contain bit 7 set, the second quarter
48958 indexes for the first half of the op1, the third quarter bit 7 set
48959 and last quarter indexes for the second half of the op1.
48960 I.e. the first mask e.g. for V32QImode extract even will be:
48961 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
48962 (all values masked with 0xf except for -128) and second mask
48963 for extract even will be
48964 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
48965 m128 = GEN_INT (-128);
48966 for (i = 0; i < nelt; ++i)
48968 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48969 unsigned which = d->perm[i] >= nelt;
48970 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
48972 for (j = 0; j < eltsz; ++j)
48974 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
48975 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
48979 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48980 vperm = force_reg (V32QImode, vperm);
48982 l = gen_reg_rtx (V32QImode);
48983 op = gen_lowpart (V32QImode, d->op0);
48984 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48986 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48987 vperm = force_reg (V32QImode, vperm);
48989 h = gen_reg_rtx (V32QImode);
48990 op = gen_lowpart (V32QImode, d->op1);
48991 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48993 ior = gen_reg_rtx (V32QImode);
48994 emit_insn (gen_iorv32qi3 (ior, l, h));
48996 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
48997 op = gen_reg_rtx (V4DImode);
48998 ior = gen_lowpart (V4DImode, ior);
48999 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
49000 const1_rtx, GEN_INT (3)));
49001 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
49003 return true;
49006 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
49007 and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands
49008 with two "and" and "pack" or two "shift" and "pack" insns. We should
49009 have already failed all two instruction sequences. */
49011 static bool
49012 expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
49014 rtx op, dop0, dop1, t, rperm[16];
49015 unsigned i, odd, c, s, nelt = d->nelt;
49016 bool end_perm = false;
49017 machine_mode half_mode;
49018 rtx (*gen_and) (rtx, rtx, rtx);
49019 rtx (*gen_pack) (rtx, rtx, rtx);
49020 rtx (*gen_shift) (rtx, rtx, rtx);
49022 if (d->one_operand_p)
49023 return false;
49025 switch (d->vmode)
49027 case V8HImode:
49028 /* Required for "pack". */
49029 if (!TARGET_SSE4_1)
49030 return false;
49031 c = 0xffff;
49032 s = 16;
49033 half_mode = V4SImode;
49034 gen_and = gen_andv4si3;
49035 gen_pack = gen_sse4_1_packusdw;
49036 gen_shift = gen_lshrv4si3;
49037 break;
49038 case V16QImode:
49039 /* No check as all instructions are SSE2. */
49040 c = 0xff;
49041 s = 8;
49042 half_mode = V8HImode;
49043 gen_and = gen_andv8hi3;
49044 gen_pack = gen_sse2_packuswb;
49045 gen_shift = gen_lshrv8hi3;
49046 break;
49047 case V16HImode:
49048 if (!TARGET_AVX2)
49049 return false;
49050 c = 0xffff;
49051 s = 16;
49052 half_mode = V8SImode;
49053 gen_and = gen_andv8si3;
49054 gen_pack = gen_avx2_packusdw;
49055 gen_shift = gen_lshrv8si3;
49056 end_perm = true;
49057 break;
49058 case V32QImode:
49059 if (!TARGET_AVX2)
49060 return false;
49061 c = 0xff;
49062 s = 8;
49063 half_mode = V16HImode;
49064 gen_and = gen_andv16hi3;
49065 gen_pack = gen_avx2_packuswb;
49066 gen_shift = gen_lshrv16hi3;
49067 end_perm = true;
49068 break;
49069 default:
49070 /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than
49071 general shuffles. */
49072 return false;
49075 /* Check that permutation is even or odd. */
49076 odd = d->perm[0];
49077 if (odd > 1)
49078 return false;
49080 for (i = 1; i < nelt; ++i)
49081 if (d->perm[i] != 2 * i + odd)
49082 return false;
49084 if (d->testing_p)
49085 return true;
49087 dop0 = gen_reg_rtx (half_mode);
49088 dop1 = gen_reg_rtx (half_mode);
49089 if (odd == 0)
49091 for (i = 0; i < nelt / 2; i++)
49092 rperm[i] = GEN_INT (c);
49093 t = gen_rtx_CONST_VECTOR (half_mode, gen_rtvec_v (nelt / 2, rperm));
49094 t = force_reg (half_mode, t);
49095 emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0)));
49096 emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1)));
49098 else
49100 emit_insn (gen_shift (dop0,
49101 gen_lowpart (half_mode, d->op0),
49102 GEN_INT (s)));
49103 emit_insn (gen_shift (dop1,
49104 gen_lowpart (half_mode, d->op1),
49105 GEN_INT (s)));
49107 /* In AVX2 for 256 bit case we need to permute pack result. */
49108 if (TARGET_AVX2 && end_perm)
49110 op = gen_reg_rtx (d->vmode);
49111 t = gen_reg_rtx (V4DImode);
49112 emit_insn (gen_pack (op, dop0, dop1));
49113 emit_insn (gen_avx2_permv4di_1 (t,
49114 gen_lowpart (V4DImode, op),
49115 const0_rtx,
49116 const2_rtx,
49117 const1_rtx,
49118 GEN_INT (3)));
49119 emit_move_insn (d->target, gen_lowpart (d->vmode, t));
49121 else
49122 emit_insn (gen_pack (d->target, dop0, dop1));
49124 return true;
49127 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
49128 and extract-odd permutations. */
49130 static bool
49131 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
49133 rtx t1, t2, t3, t4, t5;
49135 switch (d->vmode)
49137 case V4DFmode:
49138 if (d->testing_p)
49139 break;
49140 t1 = gen_reg_rtx (V4DFmode);
49141 t2 = gen_reg_rtx (V4DFmode);
49143 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
49144 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
49145 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
49147 /* Now an unpck[lh]pd will produce the result required. */
49148 if (odd)
49149 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
49150 else
49151 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
49152 emit_insn (t3);
49153 break;
49155 case V8SFmode:
49157 int mask = odd ? 0xdd : 0x88;
49159 if (d->testing_p)
49160 break;
49161 t1 = gen_reg_rtx (V8SFmode);
49162 t2 = gen_reg_rtx (V8SFmode);
49163 t3 = gen_reg_rtx (V8SFmode);
49165 /* Shuffle within the 128-bit lanes to produce:
49166 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
49167 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
49168 GEN_INT (mask)));
49170 /* Shuffle the lanes around to produce:
49171 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
49172 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
49173 GEN_INT (0x3)));
49175 /* Shuffle within the 128-bit lanes to produce:
49176 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
49177 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
49179 /* Shuffle within the 128-bit lanes to produce:
49180 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
49181 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
49183 /* Shuffle the lanes around to produce:
49184 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
49185 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
49186 GEN_INT (0x20)));
49188 break;
49190 case V2DFmode:
49191 case V4SFmode:
49192 case V2DImode:
49193 case V4SImode:
49194 /* These are always directly implementable by expand_vec_perm_1. */
49195 gcc_unreachable ();
49197 case V8HImode:
49198 if (TARGET_SSE4_1)
49199 return expand_vec_perm_even_odd_pack (d);
49200 else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
49201 return expand_vec_perm_pshufb2 (d);
49202 else
49204 if (d->testing_p)
49205 break;
49206 /* We need 2*log2(N)-1 operations to achieve odd/even
49207 with interleave. */
49208 t1 = gen_reg_rtx (V8HImode);
49209 t2 = gen_reg_rtx (V8HImode);
49210 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
49211 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
49212 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
49213 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
49214 if (odd)
49215 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
49216 else
49217 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
49218 emit_insn (t3);
49220 break;
49222 case V16QImode:
49223 return expand_vec_perm_even_odd_pack (d);
49225 case V16HImode:
49226 case V32QImode:
49227 return expand_vec_perm_even_odd_pack (d);
49229 case V4DImode:
49230 if (!TARGET_AVX2)
49232 struct expand_vec_perm_d d_copy = *d;
49233 d_copy.vmode = V4DFmode;
49234 if (d->testing_p)
49235 d_copy.target = gen_lowpart (V4DFmode, d->target);
49236 else
49237 d_copy.target = gen_reg_rtx (V4DFmode);
49238 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
49239 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
49240 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
49242 if (!d->testing_p)
49243 emit_move_insn (d->target,
49244 gen_lowpart (V4DImode, d_copy.target));
49245 return true;
49247 return false;
49250 if (d->testing_p)
49251 break;
49253 t1 = gen_reg_rtx (V4DImode);
49254 t2 = gen_reg_rtx (V4DImode);
49256 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
49257 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
49258 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
49260 /* Now an vpunpck[lh]qdq will produce the result required. */
49261 if (odd)
49262 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
49263 else
49264 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
49265 emit_insn (t3);
49266 break;
49268 case V8SImode:
49269 if (!TARGET_AVX2)
49271 struct expand_vec_perm_d d_copy = *d;
49272 d_copy.vmode = V8SFmode;
49273 if (d->testing_p)
49274 d_copy.target = gen_lowpart (V8SFmode, d->target);
49275 else
49276 d_copy.target = gen_reg_rtx (V8SFmode);
49277 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
49278 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
49279 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
49281 if (!d->testing_p)
49282 emit_move_insn (d->target,
49283 gen_lowpart (V8SImode, d_copy.target));
49284 return true;
49286 return false;
49289 if (d->testing_p)
49290 break;
49292 t1 = gen_reg_rtx (V8SImode);
49293 t2 = gen_reg_rtx (V8SImode);
49294 t3 = gen_reg_rtx (V4DImode);
49295 t4 = gen_reg_rtx (V4DImode);
49296 t5 = gen_reg_rtx (V4DImode);
49298 /* Shuffle the lanes around into
49299 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
49300 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
49301 gen_lowpart (V4DImode, d->op1),
49302 GEN_INT (0x20)));
49303 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
49304 gen_lowpart (V4DImode, d->op1),
49305 GEN_INT (0x31)));
49307 /* Swap the 2nd and 3rd position in each lane into
49308 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
49309 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
49310 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
49311 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
49312 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
49314 /* Now an vpunpck[lh]qdq will produce
49315 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
49316 if (odd)
49317 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
49318 gen_lowpart (V4DImode, t2));
49319 else
49320 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
49321 gen_lowpart (V4DImode, t2));
49322 emit_insn (t3);
49323 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
49324 break;
49326 default:
49327 gcc_unreachable ();
49330 return true;
49333 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
49334 extract-even and extract-odd permutations. */
49336 static bool
49337 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
49339 unsigned i, odd, nelt = d->nelt;
49341 odd = d->perm[0];
49342 if (odd != 0 && odd != 1)
49343 return false;
49345 for (i = 1; i < nelt; ++i)
49346 if (d->perm[i] != 2 * i + odd)
49347 return false;
49349 return expand_vec_perm_even_odd_1 (d, odd);
49352 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
49353 permutations. We assume that expand_vec_perm_1 has already failed. */
49355 static bool
49356 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
49358 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
49359 machine_mode vmode = d->vmode;
49360 unsigned char perm2[4];
49361 rtx op0 = d->op0, dest;
49362 bool ok;
49364 switch (vmode)
49366 case V4DFmode:
49367 case V8SFmode:
49368 /* These are special-cased in sse.md so that we can optionally
49369 use the vbroadcast instruction. They expand to two insns
49370 if the input happens to be in a register. */
49371 gcc_unreachable ();
49373 case V2DFmode:
49374 case V2DImode:
49375 case V4SFmode:
49376 case V4SImode:
49377 /* These are always implementable using standard shuffle patterns. */
49378 gcc_unreachable ();
49380 case V8HImode:
49381 case V16QImode:
49382 /* These can be implemented via interleave. We save one insn by
49383 stopping once we have promoted to V4SImode and then use pshufd. */
49384 if (d->testing_p)
49385 return true;
49388 rtx dest;
49389 rtx (*gen) (rtx, rtx, rtx)
49390 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
49391 : gen_vec_interleave_lowv8hi;
49393 if (elt >= nelt2)
49395 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
49396 : gen_vec_interleave_highv8hi;
49397 elt -= nelt2;
49399 nelt2 /= 2;
49401 dest = gen_reg_rtx (vmode);
49402 emit_insn (gen (dest, op0, op0));
49403 vmode = get_mode_wider_vector (vmode);
49404 op0 = gen_lowpart (vmode, dest);
49406 while (vmode != V4SImode);
49408 memset (perm2, elt, 4);
49409 dest = gen_reg_rtx (V4SImode);
49410 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
49411 gcc_assert (ok);
49412 if (!d->testing_p)
49413 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
49414 return true;
49416 case V64QImode:
49417 case V32QImode:
49418 case V16HImode:
49419 case V8SImode:
49420 case V4DImode:
49421 /* For AVX2 broadcasts of the first element vpbroadcast* or
49422 vpermq should be used by expand_vec_perm_1. */
49423 gcc_assert (!TARGET_AVX2 || d->perm[0]);
49424 return false;
49426 default:
49427 gcc_unreachable ();
49431 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
49432 broadcast permutations. */
49434 static bool
49435 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
49437 unsigned i, elt, nelt = d->nelt;
49439 if (!d->one_operand_p)
49440 return false;
49442 elt = d->perm[0];
49443 for (i = 1; i < nelt; ++i)
49444 if (d->perm[i] != elt)
49445 return false;
49447 return expand_vec_perm_broadcast_1 (d);
49450 /* Implement arbitrary permutations of two V64QImode operands
49451 will 2 vpermi2w, 2 vpshufb and one vpor instruction. */
49452 static bool
49453 expand_vec_perm_vpermi2_vpshub2 (struct expand_vec_perm_d *d)
49455 if (!TARGET_AVX512BW || !(d->vmode == V64QImode))
49456 return false;
49458 if (d->testing_p)
49459 return true;
49461 struct expand_vec_perm_d ds[2];
49462 rtx rperm[128], vperm, target0, target1;
49463 unsigned int i, nelt;
49464 machine_mode vmode;
49466 nelt = d->nelt;
49467 vmode = V64QImode;
49469 for (i = 0; i < 2; i++)
49471 ds[i] = *d;
49472 ds[i].vmode = V32HImode;
49473 ds[i].nelt = 32;
49474 ds[i].target = gen_reg_rtx (V32HImode);
49475 ds[i].op0 = gen_lowpart (V32HImode, d->op0);
49476 ds[i].op1 = gen_lowpart (V32HImode, d->op1);
49479 /* Prepare permutations such that the first one takes care of
49480 putting the even bytes into the right positions or one higher
49481 positions (ds[0]) and the second one takes care of
49482 putting the odd bytes into the right positions or one below
49483 (ds[1]). */
49485 for (i = 0; i < nelt; i++)
49487 ds[i & 1].perm[i / 2] = d->perm[i] / 2;
49488 if (i & 1)
49490 rperm[i] = constm1_rtx;
49491 rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1));
49493 else
49495 rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1));
49496 rperm[i + 64] = constm1_rtx;
49500 bool ok = expand_vec_perm_1 (&ds[0]);
49501 gcc_assert (ok);
49502 ds[0].target = gen_lowpart (V64QImode, ds[0].target);
49504 ok = expand_vec_perm_1 (&ds[1]);
49505 gcc_assert (ok);
49506 ds[1].target = gen_lowpart (V64QImode, ds[1].target);
49508 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm));
49509 vperm = force_reg (vmode, vperm);
49510 target0 = gen_reg_rtx (V64QImode);
49511 emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm));
49513 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64));
49514 vperm = force_reg (vmode, vperm);
49515 target1 = gen_reg_rtx (V64QImode);
49516 emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm));
49518 emit_insn (gen_iorv64qi3 (d->target, target0, target1));
49519 return true;
49522 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
49523 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
49524 all the shorter instruction sequences. */
49526 static bool
49527 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
49529 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
49530 unsigned int i, nelt, eltsz;
49531 bool used[4];
49533 if (!TARGET_AVX2
49534 || d->one_operand_p
49535 || (d->vmode != V32QImode && d->vmode != V16HImode))
49536 return false;
49538 if (d->testing_p)
49539 return true;
49541 nelt = d->nelt;
49542 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
49544 /* Generate 4 permutation masks. If the required element is within
49545 the same lane, it is shuffled in. If the required element from the
49546 other lane, force a zero by setting bit 7 in the permutation mask.
49547 In the other mask the mask has non-negative elements if element
49548 is requested from the other lane, but also moved to the other lane,
49549 so that the result of vpshufb can have the two V2TImode halves
49550 swapped. */
49551 m128 = GEN_INT (-128);
49552 for (i = 0; i < 32; ++i)
49554 rperm[0][i] = m128;
49555 rperm[1][i] = m128;
49556 rperm[2][i] = m128;
49557 rperm[3][i] = m128;
49559 used[0] = false;
49560 used[1] = false;
49561 used[2] = false;
49562 used[3] = false;
49563 for (i = 0; i < nelt; ++i)
49565 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
49566 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
49567 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
49569 for (j = 0; j < eltsz; ++j)
49570 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
49571 used[which] = true;
49574 for (i = 0; i < 2; ++i)
49576 if (!used[2 * i + 1])
49578 h[i] = NULL_RTX;
49579 continue;
49581 vperm = gen_rtx_CONST_VECTOR (V32QImode,
49582 gen_rtvec_v (32, rperm[2 * i + 1]));
49583 vperm = force_reg (V32QImode, vperm);
49584 h[i] = gen_reg_rtx (V32QImode);
49585 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49586 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
49589 /* Swap the 128-byte lanes of h[X]. */
49590 for (i = 0; i < 2; ++i)
49592 if (h[i] == NULL_RTX)
49593 continue;
49594 op = gen_reg_rtx (V4DImode);
49595 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
49596 const2_rtx, GEN_INT (3), const0_rtx,
49597 const1_rtx));
49598 h[i] = gen_lowpart (V32QImode, op);
49601 for (i = 0; i < 2; ++i)
49603 if (!used[2 * i])
49605 l[i] = NULL_RTX;
49606 continue;
49608 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
49609 vperm = force_reg (V32QImode, vperm);
49610 l[i] = gen_reg_rtx (V32QImode);
49611 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49612 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
49615 for (i = 0; i < 2; ++i)
49617 if (h[i] && l[i])
49619 op = gen_reg_rtx (V32QImode);
49620 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
49621 l[i] = op;
49623 else if (h[i])
49624 l[i] = h[i];
49627 gcc_assert (l[0] && l[1]);
49628 op = d->target;
49629 if (d->vmode != V32QImode)
49630 op = gen_reg_rtx (V32QImode);
49631 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
49632 if (op != d->target)
49633 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
49634 return true;
49637 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
49638 With all of the interface bits taken care of, perform the expansion
49639 in D and return true on success. */
49641 static bool
49642 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
49644 /* Try a single instruction expansion. */
49645 if (expand_vec_perm_1 (d))
49646 return true;
49648 /* Try sequences of two instructions. */
49650 if (expand_vec_perm_pshuflw_pshufhw (d))
49651 return true;
49653 if (expand_vec_perm_palignr (d, false))
49654 return true;
49656 if (expand_vec_perm_interleave2 (d))
49657 return true;
49659 if (expand_vec_perm_broadcast (d))
49660 return true;
49662 if (expand_vec_perm_vpermq_perm_1 (d))
49663 return true;
49665 if (expand_vec_perm_vperm2f128 (d))
49666 return true;
49668 if (expand_vec_perm_pblendv (d))
49669 return true;
49671 /* Try sequences of three instructions. */
49673 if (expand_vec_perm_even_odd_pack (d))
49674 return true;
49676 if (expand_vec_perm_2vperm2f128_vshuf (d))
49677 return true;
49679 if (expand_vec_perm_pshufb2 (d))
49680 return true;
49682 if (expand_vec_perm_interleave3 (d))
49683 return true;
49685 if (expand_vec_perm_vperm2f128_vblend (d))
49686 return true;
49688 /* Try sequences of four instructions. */
49690 if (expand_vec_perm_vpshufb2_vpermq (d))
49691 return true;
49693 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
49694 return true;
49696 if (expand_vec_perm_vpermi2_vpshub2 (d))
49697 return true;
49699 /* ??? Look for narrow permutations whose element orderings would
49700 allow the promotion to a wider mode. */
49702 /* ??? Look for sequences of interleave or a wider permute that place
49703 the data into the correct lanes for a half-vector shuffle like
49704 pshuf[lh]w or vpermilps. */
49706 /* ??? Look for sequences of interleave that produce the desired results.
49707 The combinatorics of punpck[lh] get pretty ugly... */
49709 if (expand_vec_perm_even_odd (d))
49710 return true;
49712 /* Even longer sequences. */
49713 if (expand_vec_perm_vpshufb4_vpermq2 (d))
49714 return true;
49716 return false;
49719 /* If a permutation only uses one operand, make it clear. Returns true
49720 if the permutation references both operands. */
49722 static bool
49723 canonicalize_perm (struct expand_vec_perm_d *d)
49725 int i, which, nelt = d->nelt;
49727 for (i = which = 0; i < nelt; ++i)
49728 which |= (d->perm[i] < nelt ? 1 : 2);
49730 d->one_operand_p = true;
49731 switch (which)
49733 default:
49734 gcc_unreachable();
49736 case 3:
49737 if (!rtx_equal_p (d->op0, d->op1))
49739 d->one_operand_p = false;
49740 break;
49742 /* The elements of PERM do not suggest that only the first operand
49743 is used, but both operands are identical. Allow easier matching
49744 of the permutation by folding the permutation into the single
49745 input vector. */
49746 /* FALLTHRU */
49748 case 2:
49749 for (i = 0; i < nelt; ++i)
49750 d->perm[i] &= nelt - 1;
49751 d->op0 = d->op1;
49752 break;
49754 case 1:
49755 d->op1 = d->op0;
49756 break;
49759 return (which == 3);
49762 bool
49763 ix86_expand_vec_perm_const (rtx operands[4])
49765 struct expand_vec_perm_d d;
49766 unsigned char perm[MAX_VECT_LEN];
49767 int i, nelt;
49768 bool two_args;
49769 rtx sel;
49771 d.target = operands[0];
49772 d.op0 = operands[1];
49773 d.op1 = operands[2];
49774 sel = operands[3];
49776 d.vmode = GET_MODE (d.target);
49777 gcc_assert (VECTOR_MODE_P (d.vmode));
49778 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49779 d.testing_p = false;
49781 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
49782 gcc_assert (XVECLEN (sel, 0) == nelt);
49783 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
49785 for (i = 0; i < nelt; ++i)
49787 rtx e = XVECEXP (sel, 0, i);
49788 int ei = INTVAL (e) & (2 * nelt - 1);
49789 d.perm[i] = ei;
49790 perm[i] = ei;
49793 two_args = canonicalize_perm (&d);
49795 if (ix86_expand_vec_perm_const_1 (&d))
49796 return true;
49798 /* If the selector says both arguments are needed, but the operands are the
49799 same, the above tried to expand with one_operand_p and flattened selector.
49800 If that didn't work, retry without one_operand_p; we succeeded with that
49801 during testing. */
49802 if (two_args && d.one_operand_p)
49804 d.one_operand_p = false;
49805 memcpy (d.perm, perm, sizeof (perm));
49806 return ix86_expand_vec_perm_const_1 (&d);
49809 return false;
49812 /* Implement targetm.vectorize.vec_perm_const_ok. */
49814 static bool
49815 ix86_vectorize_vec_perm_const_ok (machine_mode vmode,
49816 const unsigned char *sel)
49818 struct expand_vec_perm_d d;
49819 unsigned int i, nelt, which;
49820 bool ret;
49822 d.vmode = vmode;
49823 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49824 d.testing_p = true;
49826 /* Given sufficient ISA support we can just return true here
49827 for selected vector modes. */
49828 switch (d.vmode)
49830 case V16SFmode:
49831 case V16SImode:
49832 case V8DImode:
49833 case V8DFmode:
49834 if (TARGET_AVX512F)
49835 /* All implementable with a single vpermi2 insn. */
49836 return true;
49837 break;
49838 case V32HImode:
49839 if (TARGET_AVX512BW)
49840 /* All implementable with a single vpermi2 insn. */
49841 return true;
49842 break;
49843 case V64QImode:
49844 if (TARGET_AVX512BW)
49845 /* Implementable with 2 vpermi2, 2 vpshufb and 1 or insn. */
49846 return true;
49847 break;
49848 case V8SImode:
49849 case V8SFmode:
49850 case V4DFmode:
49851 case V4DImode:
49852 if (TARGET_AVX512VL)
49853 /* All implementable with a single vpermi2 insn. */
49854 return true;
49855 break;
49856 case V16HImode:
49857 if (TARGET_AVX2)
49858 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49859 return true;
49860 break;
49861 case V32QImode:
49862 if (TARGET_AVX2)
49863 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49864 return true;
49865 break;
49866 case V4SImode:
49867 case V4SFmode:
49868 case V8HImode:
49869 case V16QImode:
49870 /* All implementable with a single vpperm insn. */
49871 if (TARGET_XOP)
49872 return true;
49873 /* All implementable with 2 pshufb + 1 ior. */
49874 if (TARGET_SSSE3)
49875 return true;
49876 break;
49877 case V2DImode:
49878 case V2DFmode:
49879 /* All implementable with shufpd or unpck[lh]pd. */
49880 return true;
49881 default:
49882 return false;
49885 /* Extract the values from the vector CST into the permutation
49886 array in D. */
49887 memcpy (d.perm, sel, nelt);
49888 for (i = which = 0; i < nelt; ++i)
49890 unsigned char e = d.perm[i];
49891 gcc_assert (e < 2 * nelt);
49892 which |= (e < nelt ? 1 : 2);
49895 /* For all elements from second vector, fold the elements to first. */
49896 if (which == 2)
49897 for (i = 0; i < nelt; ++i)
49898 d.perm[i] -= nelt;
49900 /* Check whether the mask can be applied to the vector type. */
49901 d.one_operand_p = (which != 3);
49903 /* Implementable with shufps or pshufd. */
49904 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
49905 return true;
49907 /* Otherwise we have to go through the motions and see if we can
49908 figure out how to generate the requested permutation. */
49909 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
49910 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
49911 if (!d.one_operand_p)
49912 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
49914 start_sequence ();
49915 ret = ix86_expand_vec_perm_const_1 (&d);
49916 end_sequence ();
49918 return ret;
49921 void
49922 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
49924 struct expand_vec_perm_d d;
49925 unsigned i, nelt;
49927 d.target = targ;
49928 d.op0 = op0;
49929 d.op1 = op1;
49930 d.vmode = GET_MODE (targ);
49931 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49932 d.one_operand_p = false;
49933 d.testing_p = false;
49935 for (i = 0; i < nelt; ++i)
49936 d.perm[i] = i * 2 + odd;
49938 /* We'll either be able to implement the permutation directly... */
49939 if (expand_vec_perm_1 (&d))
49940 return;
49942 /* ... or we use the special-case patterns. */
49943 expand_vec_perm_even_odd_1 (&d, odd);
49946 static void
49947 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
49949 struct expand_vec_perm_d d;
49950 unsigned i, nelt, base;
49951 bool ok;
49953 d.target = targ;
49954 d.op0 = op0;
49955 d.op1 = op1;
49956 d.vmode = GET_MODE (targ);
49957 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49958 d.one_operand_p = false;
49959 d.testing_p = false;
49961 base = high_p ? nelt / 2 : 0;
49962 for (i = 0; i < nelt / 2; ++i)
49964 d.perm[i * 2] = i + base;
49965 d.perm[i * 2 + 1] = i + base + nelt;
49968 /* Note that for AVX this isn't one instruction. */
49969 ok = ix86_expand_vec_perm_const_1 (&d);
49970 gcc_assert (ok);
49974 /* Expand a vector operation CODE for a V*QImode in terms of the
49975 same operation on V*HImode. */
49977 void
49978 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
49980 machine_mode qimode = GET_MODE (dest);
49981 machine_mode himode;
49982 rtx (*gen_il) (rtx, rtx, rtx);
49983 rtx (*gen_ih) (rtx, rtx, rtx);
49984 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
49985 struct expand_vec_perm_d d;
49986 bool ok, full_interleave;
49987 bool uns_p = false;
49988 int i;
49990 switch (qimode)
49992 case V16QImode:
49993 himode = V8HImode;
49994 gen_il = gen_vec_interleave_lowv16qi;
49995 gen_ih = gen_vec_interleave_highv16qi;
49996 break;
49997 case V32QImode:
49998 himode = V16HImode;
49999 gen_il = gen_avx2_interleave_lowv32qi;
50000 gen_ih = gen_avx2_interleave_highv32qi;
50001 break;
50002 case V64QImode:
50003 himode = V32HImode;
50004 gen_il = gen_avx512bw_interleave_lowv64qi;
50005 gen_ih = gen_avx512bw_interleave_highv64qi;
50006 break;
50007 default:
50008 gcc_unreachable ();
50011 op2_l = op2_h = op2;
50012 switch (code)
50014 case MULT:
50015 /* Unpack data such that we've got a source byte in each low byte of
50016 each word. We don't care what goes into the high byte of each word.
50017 Rather than trying to get zero in there, most convenient is to let
50018 it be a copy of the low byte. */
50019 op2_l = gen_reg_rtx (qimode);
50020 op2_h = gen_reg_rtx (qimode);
50021 emit_insn (gen_il (op2_l, op2, op2));
50022 emit_insn (gen_ih (op2_h, op2, op2));
50023 /* FALLTHRU */
50025 op1_l = gen_reg_rtx (qimode);
50026 op1_h = gen_reg_rtx (qimode);
50027 emit_insn (gen_il (op1_l, op1, op1));
50028 emit_insn (gen_ih (op1_h, op1, op1));
50029 full_interleave = qimode == V16QImode;
50030 break;
50032 case ASHIFT:
50033 case LSHIFTRT:
50034 uns_p = true;
50035 /* FALLTHRU */
50036 case ASHIFTRT:
50037 op1_l = gen_reg_rtx (himode);
50038 op1_h = gen_reg_rtx (himode);
50039 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
50040 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
50041 full_interleave = true;
50042 break;
50043 default:
50044 gcc_unreachable ();
50047 /* Perform the operation. */
50048 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
50049 1, OPTAB_DIRECT);
50050 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
50051 1, OPTAB_DIRECT);
50052 gcc_assert (res_l && res_h);
50054 /* Merge the data back into the right place. */
50055 d.target = dest;
50056 d.op0 = gen_lowpart (qimode, res_l);
50057 d.op1 = gen_lowpart (qimode, res_h);
50058 d.vmode = qimode;
50059 d.nelt = GET_MODE_NUNITS (qimode);
50060 d.one_operand_p = false;
50061 d.testing_p = false;
50063 if (full_interleave)
50065 /* For SSE2, we used an full interleave, so the desired
50066 results are in the even elements. */
50067 for (i = 0; i < 64; ++i)
50068 d.perm[i] = i * 2;
50070 else
50072 /* For AVX, the interleave used above was not cross-lane. So the
50073 extraction is evens but with the second and third quarter swapped.
50074 Happily, that is even one insn shorter than even extraction. */
50075 for (i = 0; i < 64; ++i)
50076 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
50079 ok = ix86_expand_vec_perm_const_1 (&d);
50080 gcc_assert (ok);
50082 set_unique_reg_note (get_last_insn (), REG_EQUAL,
50083 gen_rtx_fmt_ee (code, qimode, op1, op2));
50086 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
50087 if op is CONST_VECTOR with all odd elements equal to their
50088 preceding element. */
50090 static bool
50091 const_vector_equal_evenodd_p (rtx op)
50093 machine_mode mode = GET_MODE (op);
50094 int i, nunits = GET_MODE_NUNITS (mode);
50095 if (GET_CODE (op) != CONST_VECTOR
50096 || nunits != CONST_VECTOR_NUNITS (op))
50097 return false;
50098 for (i = 0; i < nunits; i += 2)
50099 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
50100 return false;
50101 return true;
50104 void
50105 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
50106 bool uns_p, bool odd_p)
50108 machine_mode mode = GET_MODE (op1);
50109 machine_mode wmode = GET_MODE (dest);
50110 rtx x;
50111 rtx orig_op1 = op1, orig_op2 = op2;
50113 if (!nonimmediate_operand (op1, mode))
50114 op1 = force_reg (mode, op1);
50115 if (!nonimmediate_operand (op2, mode))
50116 op2 = force_reg (mode, op2);
50118 /* We only play even/odd games with vectors of SImode. */
50119 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
50121 /* If we're looking for the odd results, shift those members down to
50122 the even slots. For some cpus this is faster than a PSHUFD. */
50123 if (odd_p)
50125 /* For XOP use vpmacsdqh, but only for smult, as it is only
50126 signed. */
50127 if (TARGET_XOP && mode == V4SImode && !uns_p)
50129 x = force_reg (wmode, CONST0_RTX (wmode));
50130 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
50131 return;
50134 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
50135 if (!const_vector_equal_evenodd_p (orig_op1))
50136 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
50137 x, NULL, 1, OPTAB_DIRECT);
50138 if (!const_vector_equal_evenodd_p (orig_op2))
50139 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
50140 x, NULL, 1, OPTAB_DIRECT);
50141 op1 = gen_lowpart (mode, op1);
50142 op2 = gen_lowpart (mode, op2);
50145 if (mode == V16SImode)
50147 if (uns_p)
50148 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
50149 else
50150 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
50152 else if (mode == V8SImode)
50154 if (uns_p)
50155 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
50156 else
50157 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
50159 else if (uns_p)
50160 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
50161 else if (TARGET_SSE4_1)
50162 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
50163 else
50165 rtx s1, s2, t0, t1, t2;
50167 /* The easiest way to implement this without PMULDQ is to go through
50168 the motions as if we are performing a full 64-bit multiply. With
50169 the exception that we need to do less shuffling of the elements. */
50171 /* Compute the sign-extension, aka highparts, of the two operands. */
50172 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
50173 op1, pc_rtx, pc_rtx);
50174 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
50175 op2, pc_rtx, pc_rtx);
50177 /* Multiply LO(A) * HI(B), and vice-versa. */
50178 t1 = gen_reg_rtx (wmode);
50179 t2 = gen_reg_rtx (wmode);
50180 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
50181 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
50183 /* Multiply LO(A) * LO(B). */
50184 t0 = gen_reg_rtx (wmode);
50185 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
50187 /* Combine and shift the highparts into place. */
50188 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
50189 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
50190 1, OPTAB_DIRECT);
50192 /* Combine high and low parts. */
50193 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
50194 return;
50196 emit_insn (x);
50199 void
50200 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
50201 bool uns_p, bool high_p)
50203 machine_mode wmode = GET_MODE (dest);
50204 machine_mode mode = GET_MODE (op1);
50205 rtx t1, t2, t3, t4, mask;
50207 switch (mode)
50209 case V4SImode:
50210 t1 = gen_reg_rtx (mode);
50211 t2 = gen_reg_rtx (mode);
50212 if (TARGET_XOP && !uns_p)
50214 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
50215 shuffle the elements once so that all elements are in the right
50216 place for immediate use: { A C B D }. */
50217 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
50218 const1_rtx, GEN_INT (3)));
50219 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
50220 const1_rtx, GEN_INT (3)));
50222 else
50224 /* Put the elements into place for the multiply. */
50225 ix86_expand_vec_interleave (t1, op1, op1, high_p);
50226 ix86_expand_vec_interleave (t2, op2, op2, high_p);
50227 high_p = false;
50229 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
50230 break;
50232 case V8SImode:
50233 /* Shuffle the elements between the lanes. After this we
50234 have { A B E F | C D G H } for each operand. */
50235 t1 = gen_reg_rtx (V4DImode);
50236 t2 = gen_reg_rtx (V4DImode);
50237 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
50238 const0_rtx, const2_rtx,
50239 const1_rtx, GEN_INT (3)));
50240 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
50241 const0_rtx, const2_rtx,
50242 const1_rtx, GEN_INT (3)));
50244 /* Shuffle the elements within the lanes. After this we
50245 have { A A B B | C C D D } or { E E F F | G G H H }. */
50246 t3 = gen_reg_rtx (V8SImode);
50247 t4 = gen_reg_rtx (V8SImode);
50248 mask = GEN_INT (high_p
50249 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
50250 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
50251 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
50252 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
50254 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
50255 break;
50257 case V8HImode:
50258 case V16HImode:
50259 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
50260 uns_p, OPTAB_DIRECT);
50261 t2 = expand_binop (mode,
50262 uns_p ? umul_highpart_optab : smul_highpart_optab,
50263 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
50264 gcc_assert (t1 && t2);
50266 t3 = gen_reg_rtx (mode);
50267 ix86_expand_vec_interleave (t3, t1, t2, high_p);
50268 emit_move_insn (dest, gen_lowpart (wmode, t3));
50269 break;
50271 case V16QImode:
50272 case V32QImode:
50273 case V32HImode:
50274 case V16SImode:
50275 case V64QImode:
50276 t1 = gen_reg_rtx (wmode);
50277 t2 = gen_reg_rtx (wmode);
50278 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
50279 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
50281 emit_insn (gen_rtx_SET (dest, gen_rtx_MULT (wmode, t1, t2)));
50282 break;
50284 default:
50285 gcc_unreachable ();
50289 void
50290 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
50292 rtx res_1, res_2, res_3, res_4;
50294 res_1 = gen_reg_rtx (V4SImode);
50295 res_2 = gen_reg_rtx (V4SImode);
50296 res_3 = gen_reg_rtx (V2DImode);
50297 res_4 = gen_reg_rtx (V2DImode);
50298 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
50299 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
50301 /* Move the results in element 2 down to element 1; we don't care
50302 what goes in elements 2 and 3. Then we can merge the parts
50303 back together with an interleave.
50305 Note that two other sequences were tried:
50306 (1) Use interleaves at the start instead of psrldq, which allows
50307 us to use a single shufps to merge things back at the end.
50308 (2) Use shufps here to combine the two vectors, then pshufd to
50309 put the elements in the correct order.
50310 In both cases the cost of the reformatting stall was too high
50311 and the overall sequence slower. */
50313 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
50314 const0_rtx, const2_rtx,
50315 const0_rtx, const0_rtx));
50316 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
50317 const0_rtx, const2_rtx,
50318 const0_rtx, const0_rtx));
50319 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
50321 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
50324 void
50325 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
50327 machine_mode mode = GET_MODE (op0);
50328 rtx t1, t2, t3, t4, t5, t6;
50330 if (TARGET_AVX512DQ && mode == V8DImode)
50331 emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
50332 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
50333 emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
50334 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
50335 emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
50336 else if (TARGET_XOP && mode == V2DImode)
50338 /* op1: A,B,C,D, op2: E,F,G,H */
50339 op1 = gen_lowpart (V4SImode, op1);
50340 op2 = gen_lowpart (V4SImode, op2);
50342 t1 = gen_reg_rtx (V4SImode);
50343 t2 = gen_reg_rtx (V4SImode);
50344 t3 = gen_reg_rtx (V2DImode);
50345 t4 = gen_reg_rtx (V2DImode);
50347 /* t1: B,A,D,C */
50348 emit_insn (gen_sse2_pshufd_1 (t1, op1,
50349 GEN_INT (1),
50350 GEN_INT (0),
50351 GEN_INT (3),
50352 GEN_INT (2)));
50354 /* t2: (B*E),(A*F),(D*G),(C*H) */
50355 emit_insn (gen_mulv4si3 (t2, t1, op2));
50357 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
50358 emit_insn (gen_xop_phadddq (t3, t2));
50360 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
50361 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
50363 /* Multiply lower parts and add all */
50364 t5 = gen_reg_rtx (V2DImode);
50365 emit_insn (gen_vec_widen_umult_even_v4si (t5,
50366 gen_lowpart (V4SImode, op1),
50367 gen_lowpart (V4SImode, op2)));
50368 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
50371 else
50373 machine_mode nmode;
50374 rtx (*umul) (rtx, rtx, rtx);
50376 if (mode == V2DImode)
50378 umul = gen_vec_widen_umult_even_v4si;
50379 nmode = V4SImode;
50381 else if (mode == V4DImode)
50383 umul = gen_vec_widen_umult_even_v8si;
50384 nmode = V8SImode;
50386 else if (mode == V8DImode)
50388 umul = gen_vec_widen_umult_even_v16si;
50389 nmode = V16SImode;
50391 else
50392 gcc_unreachable ();
50395 /* Multiply low parts. */
50396 t1 = gen_reg_rtx (mode);
50397 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
50399 /* Shift input vectors right 32 bits so we can multiply high parts. */
50400 t6 = GEN_INT (32);
50401 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
50402 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
50404 /* Multiply high parts by low parts. */
50405 t4 = gen_reg_rtx (mode);
50406 t5 = gen_reg_rtx (mode);
50407 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
50408 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
50410 /* Combine and shift the highparts back. */
50411 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
50412 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
50414 /* Combine high and low parts. */
50415 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
50418 set_unique_reg_note (get_last_insn (), REG_EQUAL,
50419 gen_rtx_MULT (mode, op1, op2));
50422 /* Return 1 if control tansfer instruction INSN
50423 should be encoded with bnd prefix.
50424 If insn is NULL then return 1 when control
50425 transfer instructions should be prefixed with
50426 bnd by default for current function. */
50428 bool
50429 ix86_bnd_prefixed_insn_p (rtx insn)
50431 /* For call insns check special flag. */
50432 if (insn && CALL_P (insn))
50434 rtx call = get_call_rtx_from (insn);
50435 if (call)
50436 return CALL_EXPR_WITH_BOUNDS_P (call);
50439 /* All other insns are prefixed only if function is instrumented. */
50440 return chkp_function_instrumented_p (current_function_decl);
50443 /* Calculate integer abs() using only SSE2 instructions. */
50445 void
50446 ix86_expand_sse2_abs (rtx target, rtx input)
50448 machine_mode mode = GET_MODE (target);
50449 rtx tmp0, tmp1, x;
50451 switch (mode)
50453 /* For 32-bit signed integer X, the best way to calculate the absolute
50454 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
50455 case V4SImode:
50456 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
50457 GEN_INT (GET_MODE_BITSIZE
50458 (GET_MODE_INNER (mode)) - 1),
50459 NULL, 0, OPTAB_DIRECT);
50460 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
50461 NULL, 0, OPTAB_DIRECT);
50462 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
50463 target, 0, OPTAB_DIRECT);
50464 break;
50466 /* For 16-bit signed integer X, the best way to calculate the absolute
50467 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
50468 case V8HImode:
50469 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
50471 x = expand_simple_binop (mode, SMAX, tmp0, input,
50472 target, 0, OPTAB_DIRECT);
50473 break;
50475 /* For 8-bit signed integer X, the best way to calculate the absolute
50476 value of X is min ((unsigned char) X, (unsigned char) (-X)),
50477 as SSE2 provides the PMINUB insn. */
50478 case V16QImode:
50479 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
50481 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
50482 target, 0, OPTAB_DIRECT);
50483 break;
50485 default:
50486 gcc_unreachable ();
50489 if (x != target)
50490 emit_move_insn (target, x);
50493 /* Expand an insert into a vector register through pinsr insn.
50494 Return true if successful. */
50496 bool
50497 ix86_expand_pinsr (rtx *operands)
50499 rtx dst = operands[0];
50500 rtx src = operands[3];
50502 unsigned int size = INTVAL (operands[1]);
50503 unsigned int pos = INTVAL (operands[2]);
50505 if (GET_CODE (dst) == SUBREG)
50507 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
50508 dst = SUBREG_REG (dst);
50511 if (GET_CODE (src) == SUBREG)
50512 src = SUBREG_REG (src);
50514 switch (GET_MODE (dst))
50516 case V16QImode:
50517 case V8HImode:
50518 case V4SImode:
50519 case V2DImode:
50521 machine_mode srcmode, dstmode;
50522 rtx (*pinsr)(rtx, rtx, rtx, rtx);
50524 srcmode = mode_for_size (size, MODE_INT, 0);
50526 switch (srcmode)
50528 case QImode:
50529 if (!TARGET_SSE4_1)
50530 return false;
50531 dstmode = V16QImode;
50532 pinsr = gen_sse4_1_pinsrb;
50533 break;
50535 case HImode:
50536 if (!TARGET_SSE2)
50537 return false;
50538 dstmode = V8HImode;
50539 pinsr = gen_sse2_pinsrw;
50540 break;
50542 case SImode:
50543 if (!TARGET_SSE4_1)
50544 return false;
50545 dstmode = V4SImode;
50546 pinsr = gen_sse4_1_pinsrd;
50547 break;
50549 case DImode:
50550 gcc_assert (TARGET_64BIT);
50551 if (!TARGET_SSE4_1)
50552 return false;
50553 dstmode = V2DImode;
50554 pinsr = gen_sse4_1_pinsrq;
50555 break;
50557 default:
50558 return false;
50561 rtx d = dst;
50562 if (GET_MODE (dst) != dstmode)
50563 d = gen_reg_rtx (dstmode);
50564 src = gen_lowpart (srcmode, src);
50566 pos /= size;
50568 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
50569 GEN_INT (1 << pos)));
50570 if (d != dst)
50571 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
50572 return true;
50575 default:
50576 return false;
50580 /* This function returns the calling abi specific va_list type node.
50581 It returns the FNDECL specific va_list type. */
50583 static tree
50584 ix86_fn_abi_va_list (tree fndecl)
50586 if (!TARGET_64BIT)
50587 return va_list_type_node;
50588 gcc_assert (fndecl != NULL_TREE);
50590 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
50591 return ms_va_list_type_node;
50592 else
50593 return sysv_va_list_type_node;
50596 /* Returns the canonical va_list type specified by TYPE. If there
50597 is no valid TYPE provided, it return NULL_TREE. */
50599 static tree
50600 ix86_canonical_va_list_type (tree type)
50602 tree wtype, htype;
50604 /* Resolve references and pointers to va_list type. */
50605 if (TREE_CODE (type) == MEM_REF)
50606 type = TREE_TYPE (type);
50607 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
50608 type = TREE_TYPE (type);
50609 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
50610 type = TREE_TYPE (type);
50612 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
50614 wtype = va_list_type_node;
50615 gcc_assert (wtype != NULL_TREE);
50616 htype = type;
50617 if (TREE_CODE (wtype) == ARRAY_TYPE)
50619 /* If va_list is an array type, the argument may have decayed
50620 to a pointer type, e.g. by being passed to another function.
50621 In that case, unwrap both types so that we can compare the
50622 underlying records. */
50623 if (TREE_CODE (htype) == ARRAY_TYPE
50624 || POINTER_TYPE_P (htype))
50626 wtype = TREE_TYPE (wtype);
50627 htype = TREE_TYPE (htype);
50630 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50631 return va_list_type_node;
50632 wtype = sysv_va_list_type_node;
50633 gcc_assert (wtype != NULL_TREE);
50634 htype = type;
50635 if (TREE_CODE (wtype) == ARRAY_TYPE)
50637 /* If va_list is an array type, the argument may have decayed
50638 to a pointer type, e.g. by being passed to another function.
50639 In that case, unwrap both types so that we can compare the
50640 underlying records. */
50641 if (TREE_CODE (htype) == ARRAY_TYPE
50642 || POINTER_TYPE_P (htype))
50644 wtype = TREE_TYPE (wtype);
50645 htype = TREE_TYPE (htype);
50648 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50649 return sysv_va_list_type_node;
50650 wtype = ms_va_list_type_node;
50651 gcc_assert (wtype != NULL_TREE);
50652 htype = type;
50653 if (TREE_CODE (wtype) == ARRAY_TYPE)
50655 /* If va_list is an array type, the argument may have decayed
50656 to a pointer type, e.g. by being passed to another function.
50657 In that case, unwrap both types so that we can compare the
50658 underlying records. */
50659 if (TREE_CODE (htype) == ARRAY_TYPE
50660 || POINTER_TYPE_P (htype))
50662 wtype = TREE_TYPE (wtype);
50663 htype = TREE_TYPE (htype);
50666 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50667 return ms_va_list_type_node;
50668 return NULL_TREE;
50670 return std_canonical_va_list_type (type);
50673 /* Iterate through the target-specific builtin types for va_list.
50674 IDX denotes the iterator, *PTREE is set to the result type of
50675 the va_list builtin, and *PNAME to its internal type.
50676 Returns zero if there is no element for this index, otherwise
50677 IDX should be increased upon the next call.
50678 Note, do not iterate a base builtin's name like __builtin_va_list.
50679 Used from c_common_nodes_and_builtins. */
50681 static int
50682 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
50684 if (TARGET_64BIT)
50686 switch (idx)
50688 default:
50689 break;
50691 case 0:
50692 *ptree = ms_va_list_type_node;
50693 *pname = "__builtin_ms_va_list";
50694 return 1;
50696 case 1:
50697 *ptree = sysv_va_list_type_node;
50698 *pname = "__builtin_sysv_va_list";
50699 return 1;
50703 return 0;
50706 #undef TARGET_SCHED_DISPATCH
50707 #define TARGET_SCHED_DISPATCH has_dispatch
50708 #undef TARGET_SCHED_DISPATCH_DO
50709 #define TARGET_SCHED_DISPATCH_DO do_dispatch
50710 #undef TARGET_SCHED_REASSOCIATION_WIDTH
50711 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
50712 #undef TARGET_SCHED_REORDER
50713 #define TARGET_SCHED_REORDER ix86_sched_reorder
50714 #undef TARGET_SCHED_ADJUST_PRIORITY
50715 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
50716 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
50717 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
50718 ix86_dependencies_evaluation_hook
50720 /* The size of the dispatch window is the total number of bytes of
50721 object code allowed in a window. */
50722 #define DISPATCH_WINDOW_SIZE 16
50724 /* Number of dispatch windows considered for scheduling. */
50725 #define MAX_DISPATCH_WINDOWS 3
50727 /* Maximum number of instructions in a window. */
50728 #define MAX_INSN 4
50730 /* Maximum number of immediate operands in a window. */
50731 #define MAX_IMM 4
50733 /* Maximum number of immediate bits allowed in a window. */
50734 #define MAX_IMM_SIZE 128
50736 /* Maximum number of 32 bit immediates allowed in a window. */
50737 #define MAX_IMM_32 4
50739 /* Maximum number of 64 bit immediates allowed in a window. */
50740 #define MAX_IMM_64 2
50742 /* Maximum total of loads or prefetches allowed in a window. */
50743 #define MAX_LOAD 2
50745 /* Maximum total of stores allowed in a window. */
50746 #define MAX_STORE 1
50748 #undef BIG
50749 #define BIG 100
50752 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
50753 enum dispatch_group {
50754 disp_no_group = 0,
50755 disp_load,
50756 disp_store,
50757 disp_load_store,
50758 disp_prefetch,
50759 disp_imm,
50760 disp_imm_32,
50761 disp_imm_64,
50762 disp_branch,
50763 disp_cmp,
50764 disp_jcc,
50765 disp_last
50768 /* Number of allowable groups in a dispatch window. It is an array
50769 indexed by dispatch_group enum. 100 is used as a big number,
50770 because the number of these kind of operations does not have any
50771 effect in dispatch window, but we need them for other reasons in
50772 the table. */
50773 static unsigned int num_allowable_groups[disp_last] = {
50774 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
50777 char group_name[disp_last + 1][16] = {
50778 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
50779 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
50780 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
50783 /* Instruction path. */
50784 enum insn_path {
50785 no_path = 0,
50786 path_single, /* Single micro op. */
50787 path_double, /* Double micro op. */
50788 path_multi, /* Instructions with more than 2 micro op.. */
50789 last_path
50792 /* sched_insn_info defines a window to the instructions scheduled in
50793 the basic block. It contains a pointer to the insn_info table and
50794 the instruction scheduled.
50796 Windows are allocated for each basic block and are linked
50797 together. */
50798 typedef struct sched_insn_info_s {
50799 rtx insn;
50800 enum dispatch_group group;
50801 enum insn_path path;
50802 int byte_len;
50803 int imm_bytes;
50804 } sched_insn_info;
50806 /* Linked list of dispatch windows. This is a two way list of
50807 dispatch windows of a basic block. It contains information about
50808 the number of uops in the window and the total number of
50809 instructions and of bytes in the object code for this dispatch
50810 window. */
50811 typedef struct dispatch_windows_s {
50812 int num_insn; /* Number of insn in the window. */
50813 int num_uops; /* Number of uops in the window. */
50814 int window_size; /* Number of bytes in the window. */
50815 int window_num; /* Window number between 0 or 1. */
50816 int num_imm; /* Number of immediates in an insn. */
50817 int num_imm_32; /* Number of 32 bit immediates in an insn. */
50818 int num_imm_64; /* Number of 64 bit immediates in an insn. */
50819 int imm_size; /* Total immediates in the window. */
50820 int num_loads; /* Total memory loads in the window. */
50821 int num_stores; /* Total memory stores in the window. */
50822 int violation; /* Violation exists in window. */
50823 sched_insn_info *window; /* Pointer to the window. */
50824 struct dispatch_windows_s *next;
50825 struct dispatch_windows_s *prev;
50826 } dispatch_windows;
50828 /* Immediate valuse used in an insn. */
50829 typedef struct imm_info_s
50831 int imm;
50832 int imm32;
50833 int imm64;
50834 } imm_info;
50836 static dispatch_windows *dispatch_window_list;
50837 static dispatch_windows *dispatch_window_list1;
50839 /* Get dispatch group of insn. */
50841 static enum dispatch_group
50842 get_mem_group (rtx_insn *insn)
50844 enum attr_memory memory;
50846 if (INSN_CODE (insn) < 0)
50847 return disp_no_group;
50848 memory = get_attr_memory (insn);
50849 if (memory == MEMORY_STORE)
50850 return disp_store;
50852 if (memory == MEMORY_LOAD)
50853 return disp_load;
50855 if (memory == MEMORY_BOTH)
50856 return disp_load_store;
50858 return disp_no_group;
50861 /* Return true if insn is a compare instruction. */
50863 static bool
50864 is_cmp (rtx_insn *insn)
50866 enum attr_type type;
50868 type = get_attr_type (insn);
50869 return (type == TYPE_TEST
50870 || type == TYPE_ICMP
50871 || type == TYPE_FCMP
50872 || GET_CODE (PATTERN (insn)) == COMPARE);
50875 /* Return true if a dispatch violation encountered. */
50877 static bool
50878 dispatch_violation (void)
50880 if (dispatch_window_list->next)
50881 return dispatch_window_list->next->violation;
50882 return dispatch_window_list->violation;
50885 /* Return true if insn is a branch instruction. */
50887 static bool
50888 is_branch (rtx_insn *insn)
50890 return (CALL_P (insn) || JUMP_P (insn));
50893 /* Return true if insn is a prefetch instruction. */
50895 static bool
50896 is_prefetch (rtx_insn *insn)
50898 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
50901 /* This function initializes a dispatch window and the list container holding a
50902 pointer to the window. */
50904 static void
50905 init_window (int window_num)
50907 int i;
50908 dispatch_windows *new_list;
50910 if (window_num == 0)
50911 new_list = dispatch_window_list;
50912 else
50913 new_list = dispatch_window_list1;
50915 new_list->num_insn = 0;
50916 new_list->num_uops = 0;
50917 new_list->window_size = 0;
50918 new_list->next = NULL;
50919 new_list->prev = NULL;
50920 new_list->window_num = window_num;
50921 new_list->num_imm = 0;
50922 new_list->num_imm_32 = 0;
50923 new_list->num_imm_64 = 0;
50924 new_list->imm_size = 0;
50925 new_list->num_loads = 0;
50926 new_list->num_stores = 0;
50927 new_list->violation = false;
50929 for (i = 0; i < MAX_INSN; i++)
50931 new_list->window[i].insn = NULL;
50932 new_list->window[i].group = disp_no_group;
50933 new_list->window[i].path = no_path;
50934 new_list->window[i].byte_len = 0;
50935 new_list->window[i].imm_bytes = 0;
50937 return;
50940 /* This function allocates and initializes a dispatch window and the
50941 list container holding a pointer to the window. */
50943 static dispatch_windows *
50944 allocate_window (void)
50946 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
50947 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
50949 return new_list;
50952 /* This routine initializes the dispatch scheduling information. It
50953 initiates building dispatch scheduler tables and constructs the
50954 first dispatch window. */
50956 static void
50957 init_dispatch_sched (void)
50959 /* Allocate a dispatch list and a window. */
50960 dispatch_window_list = allocate_window ();
50961 dispatch_window_list1 = allocate_window ();
50962 init_window (0);
50963 init_window (1);
50966 /* This function returns true if a branch is detected. End of a basic block
50967 does not have to be a branch, but here we assume only branches end a
50968 window. */
50970 static bool
50971 is_end_basic_block (enum dispatch_group group)
50973 return group == disp_branch;
50976 /* This function is called when the end of a window processing is reached. */
50978 static void
50979 process_end_window (void)
50981 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
50982 if (dispatch_window_list->next)
50984 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
50985 gcc_assert (dispatch_window_list->window_size
50986 + dispatch_window_list1->window_size <= 48);
50987 init_window (1);
50989 init_window (0);
50992 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
50993 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
50994 for 48 bytes of instructions. Note that these windows are not dispatch
50995 windows that their sizes are DISPATCH_WINDOW_SIZE. */
50997 static dispatch_windows *
50998 allocate_next_window (int window_num)
51000 if (window_num == 0)
51002 if (dispatch_window_list->next)
51003 init_window (1);
51004 init_window (0);
51005 return dispatch_window_list;
51008 dispatch_window_list->next = dispatch_window_list1;
51009 dispatch_window_list1->prev = dispatch_window_list;
51011 return dispatch_window_list1;
51014 /* Compute number of immediate operands of an instruction. */
51016 static void
51017 find_constant (rtx in_rtx, imm_info *imm_values)
51019 if (INSN_P (in_rtx))
51020 in_rtx = PATTERN (in_rtx);
51021 subrtx_iterator::array_type array;
51022 FOR_EACH_SUBRTX (iter, array, in_rtx, ALL)
51023 if (const_rtx x = *iter)
51024 switch (GET_CODE (x))
51026 case CONST:
51027 case SYMBOL_REF:
51028 case CONST_INT:
51029 (imm_values->imm)++;
51030 if (x86_64_immediate_operand (CONST_CAST_RTX (x), SImode))
51031 (imm_values->imm32)++;
51032 else
51033 (imm_values->imm64)++;
51034 break;
51036 case CONST_DOUBLE:
51037 case CONST_WIDE_INT:
51038 (imm_values->imm)++;
51039 (imm_values->imm64)++;
51040 break;
51042 case CODE_LABEL:
51043 if (LABEL_KIND (x) == LABEL_NORMAL)
51045 (imm_values->imm)++;
51046 (imm_values->imm32)++;
51048 break;
51050 default:
51051 break;
51055 /* Return total size of immediate operands of an instruction along with number
51056 of corresponding immediate-operands. It initializes its parameters to zero
51057 befor calling FIND_CONSTANT.
51058 INSN is the input instruction. IMM is the total of immediates.
51059 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
51060 bit immediates. */
51062 static int
51063 get_num_immediates (rtx_insn *insn, int *imm, int *imm32, int *imm64)
51065 imm_info imm_values = {0, 0, 0};
51067 find_constant (insn, &imm_values);
51068 *imm = imm_values.imm;
51069 *imm32 = imm_values.imm32;
51070 *imm64 = imm_values.imm64;
51071 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
51074 /* This function indicates if an operand of an instruction is an
51075 immediate. */
51077 static bool
51078 has_immediate (rtx_insn *insn)
51080 int num_imm_operand;
51081 int num_imm32_operand;
51082 int num_imm64_operand;
51084 if (insn)
51085 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
51086 &num_imm64_operand);
51087 return false;
51090 /* Return single or double path for instructions. */
51092 static enum insn_path
51093 get_insn_path (rtx_insn *insn)
51095 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
51097 if ((int)path == 0)
51098 return path_single;
51100 if ((int)path == 1)
51101 return path_double;
51103 return path_multi;
51106 /* Return insn dispatch group. */
51108 static enum dispatch_group
51109 get_insn_group (rtx_insn *insn)
51111 enum dispatch_group group = get_mem_group (insn);
51112 if (group)
51113 return group;
51115 if (is_branch (insn))
51116 return disp_branch;
51118 if (is_cmp (insn))
51119 return disp_cmp;
51121 if (has_immediate (insn))
51122 return disp_imm;
51124 if (is_prefetch (insn))
51125 return disp_prefetch;
51127 return disp_no_group;
51130 /* Count number of GROUP restricted instructions in a dispatch
51131 window WINDOW_LIST. */
51133 static int
51134 count_num_restricted (rtx_insn *insn, dispatch_windows *window_list)
51136 enum dispatch_group group = get_insn_group (insn);
51137 int imm_size;
51138 int num_imm_operand;
51139 int num_imm32_operand;
51140 int num_imm64_operand;
51142 if (group == disp_no_group)
51143 return 0;
51145 if (group == disp_imm)
51147 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
51148 &num_imm64_operand);
51149 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
51150 || num_imm_operand + window_list->num_imm > MAX_IMM
51151 || (num_imm32_operand > 0
51152 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
51153 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
51154 || (num_imm64_operand > 0
51155 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
51156 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
51157 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
51158 && num_imm64_operand > 0
51159 && ((window_list->num_imm_64 > 0
51160 && window_list->num_insn >= 2)
51161 || window_list->num_insn >= 3)))
51162 return BIG;
51164 return 1;
51167 if ((group == disp_load_store
51168 && (window_list->num_loads >= MAX_LOAD
51169 || window_list->num_stores >= MAX_STORE))
51170 || ((group == disp_load
51171 || group == disp_prefetch)
51172 && window_list->num_loads >= MAX_LOAD)
51173 || (group == disp_store
51174 && window_list->num_stores >= MAX_STORE))
51175 return BIG;
51177 return 1;
51180 /* This function returns true if insn satisfies dispatch rules on the
51181 last window scheduled. */
51183 static bool
51184 fits_dispatch_window (rtx_insn *insn)
51186 dispatch_windows *window_list = dispatch_window_list;
51187 dispatch_windows *window_list_next = dispatch_window_list->next;
51188 unsigned int num_restrict;
51189 enum dispatch_group group = get_insn_group (insn);
51190 enum insn_path path = get_insn_path (insn);
51191 int sum;
51193 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
51194 instructions should be given the lowest priority in the
51195 scheduling process in Haifa scheduler to make sure they will be
51196 scheduled in the same dispatch window as the reference to them. */
51197 if (group == disp_jcc || group == disp_cmp)
51198 return false;
51200 /* Check nonrestricted. */
51201 if (group == disp_no_group || group == disp_branch)
51202 return true;
51204 /* Get last dispatch window. */
51205 if (window_list_next)
51206 window_list = window_list_next;
51208 if (window_list->window_num == 1)
51210 sum = window_list->prev->window_size + window_list->window_size;
51212 if (sum == 32
51213 || (min_insn_size (insn) + sum) >= 48)
51214 /* Window 1 is full. Go for next window. */
51215 return true;
51218 num_restrict = count_num_restricted (insn, window_list);
51220 if (num_restrict > num_allowable_groups[group])
51221 return false;
51223 /* See if it fits in the first window. */
51224 if (window_list->window_num == 0)
51226 /* The first widow should have only single and double path
51227 uops. */
51228 if (path == path_double
51229 && (window_list->num_uops + 2) > MAX_INSN)
51230 return false;
51231 else if (path != path_single)
51232 return false;
51234 return true;
51237 /* Add an instruction INSN with NUM_UOPS micro-operations to the
51238 dispatch window WINDOW_LIST. */
51240 static void
51241 add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops)
51243 int byte_len = min_insn_size (insn);
51244 int num_insn = window_list->num_insn;
51245 int imm_size;
51246 sched_insn_info *window = window_list->window;
51247 enum dispatch_group group = get_insn_group (insn);
51248 enum insn_path path = get_insn_path (insn);
51249 int num_imm_operand;
51250 int num_imm32_operand;
51251 int num_imm64_operand;
51253 if (!window_list->violation && group != disp_cmp
51254 && !fits_dispatch_window (insn))
51255 window_list->violation = true;
51257 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
51258 &num_imm64_operand);
51260 /* Initialize window with new instruction. */
51261 window[num_insn].insn = insn;
51262 window[num_insn].byte_len = byte_len;
51263 window[num_insn].group = group;
51264 window[num_insn].path = path;
51265 window[num_insn].imm_bytes = imm_size;
51267 window_list->window_size += byte_len;
51268 window_list->num_insn = num_insn + 1;
51269 window_list->num_uops = window_list->num_uops + num_uops;
51270 window_list->imm_size += imm_size;
51271 window_list->num_imm += num_imm_operand;
51272 window_list->num_imm_32 += num_imm32_operand;
51273 window_list->num_imm_64 += num_imm64_operand;
51275 if (group == disp_store)
51276 window_list->num_stores += 1;
51277 else if (group == disp_load
51278 || group == disp_prefetch)
51279 window_list->num_loads += 1;
51280 else if (group == disp_load_store)
51282 window_list->num_stores += 1;
51283 window_list->num_loads += 1;
51287 /* Adds a scheduled instruction, INSN, to the current dispatch window.
51288 If the total bytes of instructions or the number of instructions in
51289 the window exceed allowable, it allocates a new window. */
51291 static void
51292 add_to_dispatch_window (rtx_insn *insn)
51294 int byte_len;
51295 dispatch_windows *window_list;
51296 dispatch_windows *next_list;
51297 dispatch_windows *window0_list;
51298 enum insn_path path;
51299 enum dispatch_group insn_group;
51300 bool insn_fits;
51301 int num_insn;
51302 int num_uops;
51303 int window_num;
51304 int insn_num_uops;
51305 int sum;
51307 if (INSN_CODE (insn) < 0)
51308 return;
51310 byte_len = min_insn_size (insn);
51311 window_list = dispatch_window_list;
51312 next_list = window_list->next;
51313 path = get_insn_path (insn);
51314 insn_group = get_insn_group (insn);
51316 /* Get the last dispatch window. */
51317 if (next_list)
51318 window_list = dispatch_window_list->next;
51320 if (path == path_single)
51321 insn_num_uops = 1;
51322 else if (path == path_double)
51323 insn_num_uops = 2;
51324 else
51325 insn_num_uops = (int) path;
51327 /* If current window is full, get a new window.
51328 Window number zero is full, if MAX_INSN uops are scheduled in it.
51329 Window number one is full, if window zero's bytes plus window
51330 one's bytes is 32, or if the bytes of the new instruction added
51331 to the total makes it greater than 48, or it has already MAX_INSN
51332 instructions in it. */
51333 num_insn = window_list->num_insn;
51334 num_uops = window_list->num_uops;
51335 window_num = window_list->window_num;
51336 insn_fits = fits_dispatch_window (insn);
51338 if (num_insn >= MAX_INSN
51339 || num_uops + insn_num_uops > MAX_INSN
51340 || !(insn_fits))
51342 window_num = ~window_num & 1;
51343 window_list = allocate_next_window (window_num);
51346 if (window_num == 0)
51348 add_insn_window (insn, window_list, insn_num_uops);
51349 if (window_list->num_insn >= MAX_INSN
51350 && insn_group == disp_branch)
51352 process_end_window ();
51353 return;
51356 else if (window_num == 1)
51358 window0_list = window_list->prev;
51359 sum = window0_list->window_size + window_list->window_size;
51360 if (sum == 32
51361 || (byte_len + sum) >= 48)
51363 process_end_window ();
51364 window_list = dispatch_window_list;
51367 add_insn_window (insn, window_list, insn_num_uops);
51369 else
51370 gcc_unreachable ();
51372 if (is_end_basic_block (insn_group))
51374 /* End of basic block is reached do end-basic-block process. */
51375 process_end_window ();
51376 return;
51380 /* Print the dispatch window, WINDOW_NUM, to FILE. */
51382 DEBUG_FUNCTION static void
51383 debug_dispatch_window_file (FILE *file, int window_num)
51385 dispatch_windows *list;
51386 int i;
51388 if (window_num == 0)
51389 list = dispatch_window_list;
51390 else
51391 list = dispatch_window_list1;
51393 fprintf (file, "Window #%d:\n", list->window_num);
51394 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
51395 list->num_insn, list->num_uops, list->window_size);
51396 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
51397 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
51399 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
51400 list->num_stores);
51401 fprintf (file, " insn info:\n");
51403 for (i = 0; i < MAX_INSN; i++)
51405 if (!list->window[i].insn)
51406 break;
51407 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
51408 i, group_name[list->window[i].group],
51409 i, (void *)list->window[i].insn,
51410 i, list->window[i].path,
51411 i, list->window[i].byte_len,
51412 i, list->window[i].imm_bytes);
51416 /* Print to stdout a dispatch window. */
51418 DEBUG_FUNCTION void
51419 debug_dispatch_window (int window_num)
51421 debug_dispatch_window_file (stdout, window_num);
51424 /* Print INSN dispatch information to FILE. */
51426 DEBUG_FUNCTION static void
51427 debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn)
51429 int byte_len;
51430 enum insn_path path;
51431 enum dispatch_group group;
51432 int imm_size;
51433 int num_imm_operand;
51434 int num_imm32_operand;
51435 int num_imm64_operand;
51437 if (INSN_CODE (insn) < 0)
51438 return;
51440 byte_len = min_insn_size (insn);
51441 path = get_insn_path (insn);
51442 group = get_insn_group (insn);
51443 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
51444 &num_imm64_operand);
51446 fprintf (file, " insn info:\n");
51447 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
51448 group_name[group], path, byte_len);
51449 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
51450 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
51453 /* Print to STDERR the status of the ready list with respect to
51454 dispatch windows. */
51456 DEBUG_FUNCTION void
51457 debug_ready_dispatch (void)
51459 int i;
51460 int no_ready = number_in_ready ();
51462 fprintf (stdout, "Number of ready: %d\n", no_ready);
51464 for (i = 0; i < no_ready; i++)
51465 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
51468 /* This routine is the driver of the dispatch scheduler. */
51470 static void
51471 do_dispatch (rtx_insn *insn, int mode)
51473 if (mode == DISPATCH_INIT)
51474 init_dispatch_sched ();
51475 else if (mode == ADD_TO_DISPATCH_WINDOW)
51476 add_to_dispatch_window (insn);
51479 /* Return TRUE if Dispatch Scheduling is supported. */
51481 static bool
51482 has_dispatch (rtx_insn *insn, int action)
51484 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
51485 && flag_dispatch_scheduler)
51486 switch (action)
51488 default:
51489 return false;
51491 case IS_DISPATCH_ON:
51492 return true;
51493 break;
51495 case IS_CMP:
51496 return is_cmp (insn);
51498 case DISPATCH_VIOLATION:
51499 return dispatch_violation ();
51501 case FITS_DISPATCH_WINDOW:
51502 return fits_dispatch_window (insn);
51505 return false;
51508 /* Implementation of reassociation_width target hook used by
51509 reassoc phase to identify parallelism level in reassociated
51510 tree. Statements tree_code is passed in OPC. Arguments type
51511 is passed in MODE.
51513 Currently parallel reassociation is enabled for Atom
51514 processors only and we set reassociation width to be 2
51515 because Atom may issue up to 2 instructions per cycle.
51517 Return value should be fixed if parallel reassociation is
51518 enabled for other processors. */
51520 static int
51521 ix86_reassociation_width (unsigned int, machine_mode mode)
51523 /* Vector part. */
51524 if (VECTOR_MODE_P (mode))
51526 if (TARGET_VECTOR_PARALLEL_EXECUTION)
51527 return 2;
51528 else
51529 return 1;
51532 /* Scalar part. */
51533 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
51534 return 2;
51535 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
51536 return 2;
51537 else
51538 return 1;
51541 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
51542 place emms and femms instructions. */
51544 static machine_mode
51545 ix86_preferred_simd_mode (machine_mode mode)
51547 if (!TARGET_SSE)
51548 return word_mode;
51550 switch (mode)
51552 case QImode:
51553 return TARGET_AVX512BW ? V64QImode :
51554 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
51555 case HImode:
51556 return TARGET_AVX512BW ? V32HImode :
51557 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
51558 case SImode:
51559 return TARGET_AVX512F ? V16SImode :
51560 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
51561 case DImode:
51562 return TARGET_AVX512F ? V8DImode :
51563 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
51565 case SFmode:
51566 if (TARGET_AVX512F)
51567 return V16SFmode;
51568 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
51569 return V8SFmode;
51570 else
51571 return V4SFmode;
51573 case DFmode:
51574 if (!TARGET_VECTORIZE_DOUBLE)
51575 return word_mode;
51576 else if (TARGET_AVX512F)
51577 return V8DFmode;
51578 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
51579 return V4DFmode;
51580 else if (TARGET_SSE2)
51581 return V2DFmode;
51582 /* FALLTHRU */
51584 default:
51585 return word_mode;
51589 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
51590 vectors. If AVX512F is enabled then try vectorizing with 512bit,
51591 256bit and 128bit vectors. */
51593 static unsigned int
51594 ix86_autovectorize_vector_sizes (void)
51596 return TARGET_AVX512F ? 64 | 32 | 16 :
51597 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
51602 /* Return class of registers which could be used for pseudo of MODE
51603 and of class RCLASS for spilling instead of memory. Return NO_REGS
51604 if it is not possible or non-profitable. */
51605 static reg_class_t
51606 ix86_spill_class (reg_class_t rclass, machine_mode mode)
51608 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
51609 && (mode == SImode || (TARGET_64BIT && mode == DImode))
51610 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
51611 return ALL_SSE_REGS;
51612 return NO_REGS;
51615 /* Implement targetm.vectorize.init_cost. */
51617 static void *
51618 ix86_init_cost (struct loop *)
51620 unsigned *cost = XNEWVEC (unsigned, 3);
51621 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
51622 return cost;
51625 /* Implement targetm.vectorize.add_stmt_cost. */
51627 static unsigned
51628 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
51629 struct _stmt_vec_info *stmt_info, int misalign,
51630 enum vect_cost_model_location where)
51632 unsigned *cost = (unsigned *) data;
51633 unsigned retval = 0;
51635 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
51636 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
51638 /* Statements in an inner loop relative to the loop being
51639 vectorized are weighted more heavily. The value here is
51640 arbitrary and could potentially be improved with analysis. */
51641 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
51642 count *= 50; /* FIXME. */
51644 retval = (unsigned) (count * stmt_cost);
51646 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
51647 for Silvermont as it has out of order integer pipeline and can execute
51648 2 scalar instruction per tick, but has in order SIMD pipeline. */
51649 if (TARGET_SILVERMONT || TARGET_INTEL)
51650 if (stmt_info && stmt_info->stmt)
51652 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
51653 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
51654 retval = (retval * 17) / 10;
51657 cost[where] += retval;
51659 return retval;
51662 /* Implement targetm.vectorize.finish_cost. */
51664 static void
51665 ix86_finish_cost (void *data, unsigned *prologue_cost,
51666 unsigned *body_cost, unsigned *epilogue_cost)
51668 unsigned *cost = (unsigned *) data;
51669 *prologue_cost = cost[vect_prologue];
51670 *body_cost = cost[vect_body];
51671 *epilogue_cost = cost[vect_epilogue];
51674 /* Implement targetm.vectorize.destroy_cost_data. */
51676 static void
51677 ix86_destroy_cost_data (void *data)
51679 free (data);
51682 /* Validate target specific memory model bits in VAL. */
51684 static unsigned HOST_WIDE_INT
51685 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
51687 enum memmodel model = memmodel_from_int (val);
51688 bool strong;
51690 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
51691 |MEMMODEL_MASK)
51692 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
51694 warning (OPT_Winvalid_memory_model,
51695 "Unknown architecture specific memory model");
51696 return MEMMODEL_SEQ_CST;
51698 strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
51699 if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
51701 warning (OPT_Winvalid_memory_model,
51702 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
51703 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
51705 if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
51707 warning (OPT_Winvalid_memory_model,
51708 "HLE_RELEASE not used with RELEASE or stronger memory model");
51709 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
51711 return val;
51714 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
51715 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
51716 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
51717 or number of vecsize_mangle variants that should be emitted. */
51719 static int
51720 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
51721 struct cgraph_simd_clone *clonei,
51722 tree base_type, int num)
51724 int ret = 1;
51726 if (clonei->simdlen
51727 && (clonei->simdlen < 2
51728 || clonei->simdlen > 16
51729 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
51731 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51732 "unsupported simdlen %d", clonei->simdlen);
51733 return 0;
51736 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
51737 if (TREE_CODE (ret_type) != VOID_TYPE)
51738 switch (TYPE_MODE (ret_type))
51740 case QImode:
51741 case HImode:
51742 case SImode:
51743 case DImode:
51744 case SFmode:
51745 case DFmode:
51746 /* case SCmode: */
51747 /* case DCmode: */
51748 break;
51749 default:
51750 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51751 "unsupported return type %qT for simd\n", ret_type);
51752 return 0;
51755 tree t;
51756 int i;
51758 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
51759 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
51760 switch (TYPE_MODE (TREE_TYPE (t)))
51762 case QImode:
51763 case HImode:
51764 case SImode:
51765 case DImode:
51766 case SFmode:
51767 case DFmode:
51768 /* case SCmode: */
51769 /* case DCmode: */
51770 break;
51771 default:
51772 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51773 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
51774 return 0;
51777 if (clonei->cilk_elemental)
51779 /* Parse here processor clause. If not present, default to 'b'. */
51780 clonei->vecsize_mangle = 'b';
51782 else if (!TREE_PUBLIC (node->decl))
51784 /* If the function isn't exported, we can pick up just one ISA
51785 for the clones. */
51786 if (TARGET_AVX2)
51787 clonei->vecsize_mangle = 'd';
51788 else if (TARGET_AVX)
51789 clonei->vecsize_mangle = 'c';
51790 else
51791 clonei->vecsize_mangle = 'b';
51792 ret = 1;
51794 else
51796 clonei->vecsize_mangle = "bcd"[num];
51797 ret = 3;
51799 switch (clonei->vecsize_mangle)
51801 case 'b':
51802 clonei->vecsize_int = 128;
51803 clonei->vecsize_float = 128;
51804 break;
51805 case 'c':
51806 clonei->vecsize_int = 128;
51807 clonei->vecsize_float = 256;
51808 break;
51809 case 'd':
51810 clonei->vecsize_int = 256;
51811 clonei->vecsize_float = 256;
51812 break;
51814 if (clonei->simdlen == 0)
51816 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
51817 clonei->simdlen = clonei->vecsize_int;
51818 else
51819 clonei->simdlen = clonei->vecsize_float;
51820 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
51821 if (clonei->simdlen > 16)
51822 clonei->simdlen = 16;
51824 return ret;
51827 /* Add target attribute to SIMD clone NODE if needed. */
51829 static void
51830 ix86_simd_clone_adjust (struct cgraph_node *node)
51832 const char *str = NULL;
51833 gcc_assert (node->decl == cfun->decl);
51834 switch (node->simdclone->vecsize_mangle)
51836 case 'b':
51837 if (!TARGET_SSE2)
51838 str = "sse2";
51839 break;
51840 case 'c':
51841 if (!TARGET_AVX)
51842 str = "avx";
51843 break;
51844 case 'd':
51845 if (!TARGET_AVX2)
51846 str = "avx2";
51847 break;
51848 default:
51849 gcc_unreachable ();
51851 if (str == NULL)
51852 return;
51853 push_cfun (NULL);
51854 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
51855 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
51856 gcc_assert (ok);
51857 pop_cfun ();
51858 ix86_reset_previous_fndecl ();
51859 ix86_set_current_function (node->decl);
51862 /* If SIMD clone NODE can't be used in a vectorized loop
51863 in current function, return -1, otherwise return a badness of using it
51864 (0 if it is most desirable from vecsize_mangle point of view, 1
51865 slightly less desirable, etc.). */
51867 static int
51868 ix86_simd_clone_usable (struct cgraph_node *node)
51870 switch (node->simdclone->vecsize_mangle)
51872 case 'b':
51873 if (!TARGET_SSE2)
51874 return -1;
51875 if (!TARGET_AVX)
51876 return 0;
51877 return TARGET_AVX2 ? 2 : 1;
51878 case 'c':
51879 if (!TARGET_AVX)
51880 return -1;
51881 return TARGET_AVX2 ? 1 : 0;
51882 break;
51883 case 'd':
51884 if (!TARGET_AVX2)
51885 return -1;
51886 return 0;
51887 default:
51888 gcc_unreachable ();
51892 /* This function adjusts the unroll factor based on
51893 the hardware capabilities. For ex, bdver3 has
51894 a loop buffer which makes unrolling of smaller
51895 loops less important. This function decides the
51896 unroll factor using number of memory references
51897 (value 32 is used) as a heuristic. */
51899 static unsigned
51900 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
51902 basic_block *bbs;
51903 rtx_insn *insn;
51904 unsigned i;
51905 unsigned mem_count = 0;
51907 if (!TARGET_ADJUST_UNROLL)
51908 return nunroll;
51910 /* Count the number of memory references within the loop body.
51911 This value determines the unrolling factor for bdver3 and bdver4
51912 architectures. */
51913 subrtx_iterator::array_type array;
51914 bbs = get_loop_body (loop);
51915 for (i = 0; i < loop->num_nodes; i++)
51916 FOR_BB_INSNS (bbs[i], insn)
51917 if (NONDEBUG_INSN_P (insn))
51918 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
51919 if (const_rtx x = *iter)
51920 if (MEM_P (x))
51922 machine_mode mode = GET_MODE (x);
51923 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
51924 if (n_words > 4)
51925 mem_count += 2;
51926 else
51927 mem_count += 1;
51929 free (bbs);
51931 if (mem_count && mem_count <=32)
51932 return 32/mem_count;
51934 return nunroll;
51938 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
51940 static bool
51941 ix86_float_exceptions_rounding_supported_p (void)
51943 /* For x87 floating point with standard excess precision handling,
51944 there is no adddf3 pattern (since x87 floating point only has
51945 XFmode operations) so the default hook implementation gets this
51946 wrong. */
51947 return TARGET_80387 || TARGET_SSE_MATH;
51950 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
51952 static void
51953 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
51955 if (!TARGET_80387 && !TARGET_SSE_MATH)
51956 return;
51957 tree exceptions_var = create_tmp_var (integer_type_node);
51958 if (TARGET_80387)
51960 tree fenv_index_type = build_index_type (size_int (6));
51961 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
51962 tree fenv_var = create_tmp_var (fenv_type);
51963 mark_addressable (fenv_var);
51964 tree fenv_ptr = build_pointer_type (fenv_type);
51965 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
51966 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
51967 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
51968 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
51969 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
51970 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
51971 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
51972 tree hold_fnclex = build_call_expr (fnclex, 0);
51973 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
51974 hold_fnclex);
51975 *clear = build_call_expr (fnclex, 0);
51976 tree sw_var = create_tmp_var (short_unsigned_type_node);
51977 tree fnstsw_call = build_call_expr (fnstsw, 0);
51978 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
51979 sw_var, fnstsw_call);
51980 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
51981 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
51982 exceptions_var, exceptions_x87);
51983 *update = build2 (COMPOUND_EXPR, integer_type_node,
51984 sw_mod, update_mod);
51985 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
51986 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
51988 if (TARGET_SSE_MATH)
51990 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node);
51991 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node);
51992 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
51993 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
51994 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
51995 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
51996 mxcsr_orig_var, stmxcsr_hold_call);
51997 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
51998 mxcsr_orig_var,
51999 build_int_cst (unsigned_type_node, 0x1f80));
52000 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
52001 build_int_cst (unsigned_type_node, 0xffffffc0));
52002 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
52003 mxcsr_mod_var, hold_mod_val);
52004 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
52005 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
52006 hold_assign_orig, hold_assign_mod);
52007 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
52008 ldmxcsr_hold_call);
52009 if (*hold)
52010 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
52011 else
52012 *hold = hold_all;
52013 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
52014 if (*clear)
52015 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
52016 ldmxcsr_clear_call);
52017 else
52018 *clear = ldmxcsr_clear_call;
52019 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
52020 tree exceptions_sse = fold_convert (integer_type_node,
52021 stxmcsr_update_call);
52022 if (*update)
52024 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
52025 exceptions_var, exceptions_sse);
52026 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
52027 exceptions_var, exceptions_mod);
52028 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
52029 exceptions_assign);
52031 else
52032 *update = build2 (MODIFY_EXPR, integer_type_node,
52033 exceptions_var, exceptions_sse);
52034 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
52035 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
52036 ldmxcsr_update_call);
52038 tree atomic_feraiseexcept
52039 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
52040 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
52041 1, exceptions_var);
52042 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
52043 atomic_feraiseexcept_call);
52046 /* Return mode to be used for bounds or VOIDmode
52047 if bounds are not supported. */
52049 static enum machine_mode
52050 ix86_mpx_bound_mode ()
52052 /* Do not support pointer checker if MPX
52053 is not enabled. */
52054 if (!TARGET_MPX)
52056 if (flag_check_pointer_bounds)
52057 warning (0, "Pointer Checker requires MPX support on this target."
52058 " Use -mmpx options to enable MPX.");
52059 return VOIDmode;
52062 return BNDmode;
52065 /* Return constant used to statically initialize constant bounds.
52067 This function is used to create special bound values. For now
52068 only INIT bounds and NONE bounds are expected. More special
52069 values may be added later. */
52071 static tree
52072 ix86_make_bounds_constant (HOST_WIDE_INT lb, HOST_WIDE_INT ub)
52074 tree low = lb ? build_minus_one_cst (pointer_sized_int_node)
52075 : build_zero_cst (pointer_sized_int_node);
52076 tree high = ub ? build_zero_cst (pointer_sized_int_node)
52077 : build_minus_one_cst (pointer_sized_int_node);
52079 /* This function is supposed to be used to create INIT and
52080 NONE bounds only. */
52081 gcc_assert ((lb == 0 && ub == -1)
52082 || (lb == -1 && ub == 0));
52084 return build_complex (NULL, low, high);
52087 /* Generate a list of statements STMTS to initialize pointer bounds
52088 variable VAR with bounds LB and UB. Return the number of generated
52089 statements. */
52091 static int
52092 ix86_initialize_bounds (tree var, tree lb, tree ub, tree *stmts)
52094 tree bnd_ptr = build_pointer_type (pointer_sized_int_node);
52095 tree lhs, modify, var_p;
52097 ub = build1 (BIT_NOT_EXPR, pointer_sized_int_node, ub);
52098 var_p = fold_convert (bnd_ptr, build_fold_addr_expr (var));
52100 lhs = build1 (INDIRECT_REF, pointer_sized_int_node, var_p);
52101 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, lb);
52102 append_to_statement_list (modify, stmts);
52104 lhs = build1 (INDIRECT_REF, pointer_sized_int_node,
52105 build2 (POINTER_PLUS_EXPR, bnd_ptr, var_p,
52106 TYPE_SIZE_UNIT (pointer_sized_int_node)));
52107 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, ub);
52108 append_to_statement_list (modify, stmts);
52110 return 2;
52113 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
52114 /* For i386, common symbol is local only for non-PIE binaries. For
52115 x86-64, common symbol is local only for non-PIE binaries or linker
52116 supports copy reloc in PIE binaries. */
52118 static bool
52119 ix86_binds_local_p (const_tree exp)
52121 return default_binds_local_p_3 (exp, flag_shlib != 0, true, true,
52122 (!flag_pic
52123 || (TARGET_64BIT
52124 && HAVE_LD_PIE_COPYRELOC != 0)));
52126 #endif
52128 /* If MEM is in the form of [base+offset], extract the two parts
52129 of address and set to BASE and OFFSET, otherwise return false. */
52131 static bool
52132 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
52134 rtx addr;
52136 gcc_assert (MEM_P (mem));
52138 addr = XEXP (mem, 0);
52140 if (GET_CODE (addr) == CONST)
52141 addr = XEXP (addr, 0);
52143 if (REG_P (addr) || GET_CODE (addr) == SYMBOL_REF)
52145 *base = addr;
52146 *offset = const0_rtx;
52147 return true;
52150 if (GET_CODE (addr) == PLUS
52151 && (REG_P (XEXP (addr, 0))
52152 || GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
52153 && CONST_INT_P (XEXP (addr, 1)))
52155 *base = XEXP (addr, 0);
52156 *offset = XEXP (addr, 1);
52157 return true;
52160 return false;
52163 /* Given OPERANDS of consecutive load/store, check if we can merge
52164 them into move multiple. LOAD is true if they are load instructions.
52165 MODE is the mode of memory operands. */
52167 bool
52168 ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
52169 enum machine_mode mode)
52171 HOST_WIDE_INT offval_1, offval_2, msize;
52172 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
52174 if (load)
52176 mem_1 = operands[1];
52177 mem_2 = operands[3];
52178 reg_1 = operands[0];
52179 reg_2 = operands[2];
52181 else
52183 mem_1 = operands[0];
52184 mem_2 = operands[2];
52185 reg_1 = operands[1];
52186 reg_2 = operands[3];
52189 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
52191 if (REGNO (reg_1) != REGNO (reg_2))
52192 return false;
52194 /* Check if the addresses are in the form of [base+offset]. */
52195 if (!extract_base_offset_in_addr (mem_1, &base_1, &offset_1))
52196 return false;
52197 if (!extract_base_offset_in_addr (mem_2, &base_2, &offset_2))
52198 return false;
52200 /* Check if the bases are the same. */
52201 if (!rtx_equal_p (base_1, base_2))
52202 return false;
52204 offval_1 = INTVAL (offset_1);
52205 offval_2 = INTVAL (offset_2);
52206 msize = GET_MODE_SIZE (mode);
52207 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
52208 if (offval_1 + msize != offval_2)
52209 return false;
52211 return true;
52214 /* Initialize the GCC target structure. */
52215 #undef TARGET_RETURN_IN_MEMORY
52216 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
52218 #undef TARGET_LEGITIMIZE_ADDRESS
52219 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
52221 #undef TARGET_ATTRIBUTE_TABLE
52222 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
52223 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
52224 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
52225 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
52226 # undef TARGET_MERGE_DECL_ATTRIBUTES
52227 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
52228 #endif
52230 #undef TARGET_COMP_TYPE_ATTRIBUTES
52231 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
52233 #undef TARGET_INIT_BUILTINS
52234 #define TARGET_INIT_BUILTINS ix86_init_builtins
52235 #undef TARGET_BUILTIN_DECL
52236 #define TARGET_BUILTIN_DECL ix86_builtin_decl
52237 #undef TARGET_EXPAND_BUILTIN
52238 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
52240 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
52241 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
52242 ix86_builtin_vectorized_function
52244 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
52245 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
52247 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
52248 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
52250 #undef TARGET_VECTORIZE_BUILTIN_GATHER
52251 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
52253 #undef TARGET_BUILTIN_RECIPROCAL
52254 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
52256 #undef TARGET_ASM_FUNCTION_EPILOGUE
52257 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
52259 #undef TARGET_ENCODE_SECTION_INFO
52260 #ifndef SUBTARGET_ENCODE_SECTION_INFO
52261 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
52262 #else
52263 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
52264 #endif
52266 #undef TARGET_ASM_OPEN_PAREN
52267 #define TARGET_ASM_OPEN_PAREN ""
52268 #undef TARGET_ASM_CLOSE_PAREN
52269 #define TARGET_ASM_CLOSE_PAREN ""
52271 #undef TARGET_ASM_BYTE_OP
52272 #define TARGET_ASM_BYTE_OP ASM_BYTE
52274 #undef TARGET_ASM_ALIGNED_HI_OP
52275 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
52276 #undef TARGET_ASM_ALIGNED_SI_OP
52277 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
52278 #ifdef ASM_QUAD
52279 #undef TARGET_ASM_ALIGNED_DI_OP
52280 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
52281 #endif
52283 #undef TARGET_PROFILE_BEFORE_PROLOGUE
52284 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
52286 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
52287 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
52289 #undef TARGET_ASM_UNALIGNED_HI_OP
52290 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
52291 #undef TARGET_ASM_UNALIGNED_SI_OP
52292 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
52293 #undef TARGET_ASM_UNALIGNED_DI_OP
52294 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
52296 #undef TARGET_PRINT_OPERAND
52297 #define TARGET_PRINT_OPERAND ix86_print_operand
52298 #undef TARGET_PRINT_OPERAND_ADDRESS
52299 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
52300 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
52301 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
52302 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
52303 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
52305 #undef TARGET_SCHED_INIT_GLOBAL
52306 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
52307 #undef TARGET_SCHED_ADJUST_COST
52308 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
52309 #undef TARGET_SCHED_ISSUE_RATE
52310 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
52311 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
52312 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
52313 ia32_multipass_dfa_lookahead
52314 #undef TARGET_SCHED_MACRO_FUSION_P
52315 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
52316 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
52317 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
52319 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
52320 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
52322 #undef TARGET_MEMMODEL_CHECK
52323 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
52325 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
52326 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
52328 #ifdef HAVE_AS_TLS
52329 #undef TARGET_HAVE_TLS
52330 #define TARGET_HAVE_TLS true
52331 #endif
52332 #undef TARGET_CANNOT_FORCE_CONST_MEM
52333 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
52334 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
52335 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
52337 #undef TARGET_DELEGITIMIZE_ADDRESS
52338 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
52340 #undef TARGET_MS_BITFIELD_LAYOUT_P
52341 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
52343 #if TARGET_MACHO
52344 #undef TARGET_BINDS_LOCAL_P
52345 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
52346 #else
52347 #undef TARGET_BINDS_LOCAL_P
52348 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
52349 #endif
52350 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
52351 #undef TARGET_BINDS_LOCAL_P
52352 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
52353 #endif
52355 #undef TARGET_ASM_OUTPUT_MI_THUNK
52356 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
52357 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
52358 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
52360 #undef TARGET_ASM_FILE_START
52361 #define TARGET_ASM_FILE_START x86_file_start
52363 #undef TARGET_OPTION_OVERRIDE
52364 #define TARGET_OPTION_OVERRIDE ix86_option_override
52366 #undef TARGET_REGISTER_MOVE_COST
52367 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
52368 #undef TARGET_MEMORY_MOVE_COST
52369 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
52370 #undef TARGET_RTX_COSTS
52371 #define TARGET_RTX_COSTS ix86_rtx_costs
52372 #undef TARGET_ADDRESS_COST
52373 #define TARGET_ADDRESS_COST ix86_address_cost
52375 #undef TARGET_FIXED_CONDITION_CODE_REGS
52376 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
52377 #undef TARGET_CC_MODES_COMPATIBLE
52378 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
52380 #undef TARGET_MACHINE_DEPENDENT_REORG
52381 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
52383 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
52384 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
52386 #undef TARGET_BUILD_BUILTIN_VA_LIST
52387 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
52389 #undef TARGET_FOLD_BUILTIN
52390 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
52392 #undef TARGET_COMPARE_VERSION_PRIORITY
52393 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
52395 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
52396 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
52397 ix86_generate_version_dispatcher_body
52399 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
52400 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
52401 ix86_get_function_versions_dispatcher
52403 #undef TARGET_ENUM_VA_LIST_P
52404 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
52406 #undef TARGET_FN_ABI_VA_LIST
52407 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
52409 #undef TARGET_CANONICAL_VA_LIST_TYPE
52410 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
52412 #undef TARGET_EXPAND_BUILTIN_VA_START
52413 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
52415 #undef TARGET_MD_ASM_ADJUST
52416 #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
52418 #undef TARGET_PROMOTE_PROTOTYPES
52419 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
52420 #undef TARGET_SETUP_INCOMING_VARARGS
52421 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
52422 #undef TARGET_MUST_PASS_IN_STACK
52423 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
52424 #undef TARGET_FUNCTION_ARG_ADVANCE
52425 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
52426 #undef TARGET_FUNCTION_ARG
52427 #define TARGET_FUNCTION_ARG ix86_function_arg
52428 #undef TARGET_INIT_PIC_REG
52429 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
52430 #undef TARGET_USE_PSEUDO_PIC_REG
52431 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
52432 #undef TARGET_FUNCTION_ARG_BOUNDARY
52433 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
52434 #undef TARGET_PASS_BY_REFERENCE
52435 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
52436 #undef TARGET_INTERNAL_ARG_POINTER
52437 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
52438 #undef TARGET_UPDATE_STACK_BOUNDARY
52439 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
52440 #undef TARGET_GET_DRAP_RTX
52441 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
52442 #undef TARGET_STRICT_ARGUMENT_NAMING
52443 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
52444 #undef TARGET_STATIC_CHAIN
52445 #define TARGET_STATIC_CHAIN ix86_static_chain
52446 #undef TARGET_TRAMPOLINE_INIT
52447 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
52448 #undef TARGET_RETURN_POPS_ARGS
52449 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
52451 #undef TARGET_LEGITIMATE_COMBINED_INSN
52452 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
52454 #undef TARGET_ASAN_SHADOW_OFFSET
52455 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
52457 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
52458 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
52460 #undef TARGET_SCALAR_MODE_SUPPORTED_P
52461 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
52463 #undef TARGET_VECTOR_MODE_SUPPORTED_P
52464 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
52466 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
52467 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
52468 ix86_libgcc_floating_mode_supported_p
52470 #undef TARGET_C_MODE_FOR_SUFFIX
52471 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
52473 #ifdef HAVE_AS_TLS
52474 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
52475 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
52476 #endif
52478 #ifdef SUBTARGET_INSERT_ATTRIBUTES
52479 #undef TARGET_INSERT_ATTRIBUTES
52480 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
52481 #endif
52483 #undef TARGET_MANGLE_TYPE
52484 #define TARGET_MANGLE_TYPE ix86_mangle_type
52486 #if !TARGET_MACHO
52487 #undef TARGET_STACK_PROTECT_FAIL
52488 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
52489 #endif
52491 #undef TARGET_FUNCTION_VALUE
52492 #define TARGET_FUNCTION_VALUE ix86_function_value
52494 #undef TARGET_FUNCTION_VALUE_REGNO_P
52495 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
52497 #undef TARGET_PROMOTE_FUNCTION_MODE
52498 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
52500 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
52501 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
52503 #undef TARGET_MEMBER_TYPE_FORCES_BLK
52504 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
52506 #undef TARGET_INSTANTIATE_DECLS
52507 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
52509 #undef TARGET_SECONDARY_RELOAD
52510 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
52512 #undef TARGET_CLASS_MAX_NREGS
52513 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
52515 #undef TARGET_PREFERRED_RELOAD_CLASS
52516 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
52517 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
52518 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
52519 #undef TARGET_CLASS_LIKELY_SPILLED_P
52520 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
52522 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
52523 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
52524 ix86_builtin_vectorization_cost
52525 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
52526 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
52527 ix86_vectorize_vec_perm_const_ok
52528 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
52529 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
52530 ix86_preferred_simd_mode
52531 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
52532 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
52533 ix86_autovectorize_vector_sizes
52534 #undef TARGET_VECTORIZE_INIT_COST
52535 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
52536 #undef TARGET_VECTORIZE_ADD_STMT_COST
52537 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
52538 #undef TARGET_VECTORIZE_FINISH_COST
52539 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
52540 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
52541 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
52543 #undef TARGET_SET_CURRENT_FUNCTION
52544 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
52546 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
52547 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
52549 #undef TARGET_OPTION_SAVE
52550 #define TARGET_OPTION_SAVE ix86_function_specific_save
52552 #undef TARGET_OPTION_RESTORE
52553 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
52555 #undef TARGET_OPTION_POST_STREAM_IN
52556 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
52558 #undef TARGET_OPTION_PRINT
52559 #define TARGET_OPTION_PRINT ix86_function_specific_print
52561 #undef TARGET_OPTION_FUNCTION_VERSIONS
52562 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
52564 #undef TARGET_CAN_INLINE_P
52565 #define TARGET_CAN_INLINE_P ix86_can_inline_p
52567 #undef TARGET_EXPAND_TO_RTL_HOOK
52568 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
52570 #undef TARGET_LEGITIMATE_ADDRESS_P
52571 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
52573 #undef TARGET_LRA_P
52574 #define TARGET_LRA_P hook_bool_void_true
52576 #undef TARGET_REGISTER_PRIORITY
52577 #define TARGET_REGISTER_PRIORITY ix86_register_priority
52579 #undef TARGET_REGISTER_USAGE_LEVELING_P
52580 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
52582 #undef TARGET_LEGITIMATE_CONSTANT_P
52583 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
52585 #undef TARGET_FRAME_POINTER_REQUIRED
52586 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
52588 #undef TARGET_CAN_ELIMINATE
52589 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
52591 #undef TARGET_EXTRA_LIVE_ON_ENTRY
52592 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
52594 #undef TARGET_ASM_CODE_END
52595 #define TARGET_ASM_CODE_END ix86_code_end
52597 #undef TARGET_CONDITIONAL_REGISTER_USAGE
52598 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
52600 #if TARGET_MACHO
52601 #undef TARGET_INIT_LIBFUNCS
52602 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
52603 #endif
52605 #undef TARGET_LOOP_UNROLL_ADJUST
52606 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
52608 #undef TARGET_SPILL_CLASS
52609 #define TARGET_SPILL_CLASS ix86_spill_class
52611 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
52612 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
52613 ix86_simd_clone_compute_vecsize_and_simdlen
52615 #undef TARGET_SIMD_CLONE_ADJUST
52616 #define TARGET_SIMD_CLONE_ADJUST \
52617 ix86_simd_clone_adjust
52619 #undef TARGET_SIMD_CLONE_USABLE
52620 #define TARGET_SIMD_CLONE_USABLE \
52621 ix86_simd_clone_usable
52623 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
52624 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
52625 ix86_float_exceptions_rounding_supported_p
52627 #undef TARGET_MODE_EMIT
52628 #define TARGET_MODE_EMIT ix86_emit_mode_set
52630 #undef TARGET_MODE_NEEDED
52631 #define TARGET_MODE_NEEDED ix86_mode_needed
52633 #undef TARGET_MODE_AFTER
52634 #define TARGET_MODE_AFTER ix86_mode_after
52636 #undef TARGET_MODE_ENTRY
52637 #define TARGET_MODE_ENTRY ix86_mode_entry
52639 #undef TARGET_MODE_EXIT
52640 #define TARGET_MODE_EXIT ix86_mode_exit
52642 #undef TARGET_MODE_PRIORITY
52643 #define TARGET_MODE_PRIORITY ix86_mode_priority
52645 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
52646 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
52648 #undef TARGET_LOAD_BOUNDS_FOR_ARG
52649 #define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds
52651 #undef TARGET_STORE_BOUNDS_FOR_ARG
52652 #define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds
52654 #undef TARGET_LOAD_RETURNED_BOUNDS
52655 #define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds
52657 #undef TARGET_STORE_RETURNED_BOUNDS
52658 #define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds
52660 #undef TARGET_CHKP_BOUND_MODE
52661 #define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode
52663 #undef TARGET_BUILTIN_CHKP_FUNCTION
52664 #define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function
52666 #undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS
52667 #define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds
52669 #undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT
52670 #define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant
52672 #undef TARGET_CHKP_INITIALIZE_BOUNDS
52673 #define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds
52675 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
52676 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
52678 #undef TARGET_OFFLOAD_OPTIONS
52679 #define TARGET_OFFLOAD_OPTIONS \
52680 ix86_offload_options
52682 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
52683 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
52685 struct gcc_target targetm = TARGET_INITIALIZER;
52687 #include "gt-i386.h"