gcc/
[official-gcc.git] / gcc / config / i386 / i386.c
blobd78f4e7f1759d83bccbabdd1cf7725d5ee7affbd
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "cfghooks.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "rtl.h"
28 #include "df.h"
29 #include "alias.h"
30 #include "fold-const.h"
31 #include "stringpool.h"
32 #include "attribs.h"
33 #include "calls.h"
34 #include "stor-layout.h"
35 #include "varasm.h"
36 #include "tm_p.h"
37 #include "regs.h"
38 #include "insn-config.h"
39 #include "conditions.h"
40 #include "output.h"
41 #include "insn-codes.h"
42 #include "insn-attr.h"
43 #include "flags.h"
44 #include "except.h"
45 #include "recog.h"
46 #include "expmed.h"
47 #include "dojump.h"
48 #include "explow.h"
49 #include "emit-rtl.h"
50 #include "stmt.h"
51 #include "expr.h"
52 #include "optabs.h"
53 #include "diagnostic-core.h"
54 #include "toplev.h"
55 #include "cfgrtl.h"
56 #include "cfganal.h"
57 #include "lcm.h"
58 #include "cfgbuild.h"
59 #include "cfgcleanup.h"
60 #include "target.h"
61 #include "common/common-target.h"
62 #include "langhooks.h"
63 #include "reload.h"
64 #include "cgraph.h"
65 #include "internal-fn.h"
66 #include "gimple-fold.h"
67 #include "tree-eh.h"
68 #include "gimplify.h"
69 #include "cfgloop.h"
70 #include "dwarf2.h"
71 #include "tm-constrs.h"
72 #include "params.h"
73 #include "cselib.h"
74 #include "debug.h"
75 #include "sched-int.h"
76 #include "opts.h"
77 #include "diagnostic.h"
78 #include "dumpfile.h"
79 #include "tree-pass.h"
80 #include "context.h"
81 #include "pass_manager.h"
82 #include "target-globals.h"
83 #include "tree-vectorizer.h"
84 #include "shrink-wrap.h"
85 #include "builtins.h"
86 #include "rtl-iter.h"
87 #include "tree-iterator.h"
88 #include "tree-chkp.h"
89 #include "rtl-chkp.h"
91 /* This file should be included last. */
92 #include "target-def.h"
94 static rtx legitimize_dllimport_symbol (rtx, bool);
95 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
96 static rtx legitimize_pe_coff_symbol (rtx, bool);
98 #ifndef CHECK_STACK_LIMIT
99 #define CHECK_STACK_LIMIT (-1)
100 #endif
102 /* Return index of given mode in mult and division cost tables. */
103 #define MODE_INDEX(mode) \
104 ((mode) == QImode ? 0 \
105 : (mode) == HImode ? 1 \
106 : (mode) == SImode ? 2 \
107 : (mode) == DImode ? 3 \
108 : 4)
110 /* Processor costs (relative to an add) */
111 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
112 #define COSTS_N_BYTES(N) ((N) * 2)
114 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
116 static stringop_algs ix86_size_memcpy[2] = {
117 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
118 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
119 static stringop_algs ix86_size_memset[2] = {
120 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
121 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
123 const
124 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
125 COSTS_N_BYTES (2), /* cost of an add instruction */
126 COSTS_N_BYTES (3), /* cost of a lea instruction */
127 COSTS_N_BYTES (2), /* variable shift costs */
128 COSTS_N_BYTES (3), /* constant shift costs */
129 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
130 COSTS_N_BYTES (3), /* HI */
131 COSTS_N_BYTES (3), /* SI */
132 COSTS_N_BYTES (3), /* DI */
133 COSTS_N_BYTES (5)}, /* other */
134 0, /* cost of multiply per each bit set */
135 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
136 COSTS_N_BYTES (3), /* HI */
137 COSTS_N_BYTES (3), /* SI */
138 COSTS_N_BYTES (3), /* DI */
139 COSTS_N_BYTES (5)}, /* other */
140 COSTS_N_BYTES (3), /* cost of movsx */
141 COSTS_N_BYTES (3), /* cost of movzx */
142 0, /* "large" insn */
143 2, /* MOVE_RATIO */
144 2, /* cost for loading QImode using movzbl */
145 {2, 2, 2}, /* cost of loading integer registers
146 in QImode, HImode and SImode.
147 Relative to reg-reg move (2). */
148 {2, 2, 2}, /* cost of storing integer registers */
149 2, /* cost of reg,reg fld/fst */
150 {2, 2, 2}, /* cost of loading fp registers
151 in SFmode, DFmode and XFmode */
152 {2, 2, 2}, /* cost of storing fp registers
153 in SFmode, DFmode and XFmode */
154 3, /* cost of moving MMX register */
155 {3, 3}, /* cost of loading MMX registers
156 in SImode and DImode */
157 {3, 3}, /* cost of storing MMX registers
158 in SImode and DImode */
159 3, /* cost of moving SSE register */
160 {3, 3, 3}, /* cost of loading SSE registers
161 in SImode, DImode and TImode */
162 {3, 3, 3}, /* cost of storing SSE registers
163 in SImode, DImode and TImode */
164 3, /* MMX or SSE register to integer */
165 0, /* size of l1 cache */
166 0, /* size of l2 cache */
167 0, /* size of prefetch block */
168 0, /* number of parallel prefetches */
169 2, /* Branch cost */
170 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
171 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
172 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
173 COSTS_N_BYTES (2), /* cost of FABS instruction. */
174 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
175 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
176 ix86_size_memcpy,
177 ix86_size_memset,
178 1, /* scalar_stmt_cost. */
179 1, /* scalar load_cost. */
180 1, /* scalar_store_cost. */
181 1, /* vec_stmt_cost. */
182 1, /* vec_to_scalar_cost. */
183 1, /* scalar_to_vec_cost. */
184 1, /* vec_align_load_cost. */
185 1, /* vec_unalign_load_cost. */
186 1, /* vec_store_cost. */
187 1, /* cond_taken_branch_cost. */
188 1, /* cond_not_taken_branch_cost. */
191 /* Processor costs (relative to an add) */
192 static stringop_algs i386_memcpy[2] = {
193 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
194 DUMMY_STRINGOP_ALGS};
195 static stringop_algs i386_memset[2] = {
196 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
197 DUMMY_STRINGOP_ALGS};
199 static const
200 struct processor_costs i386_cost = { /* 386 specific costs */
201 COSTS_N_INSNS (1), /* cost of an add instruction */
202 COSTS_N_INSNS (1), /* cost of a lea instruction */
203 COSTS_N_INSNS (3), /* variable shift costs */
204 COSTS_N_INSNS (2), /* constant shift costs */
205 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
206 COSTS_N_INSNS (6), /* HI */
207 COSTS_N_INSNS (6), /* SI */
208 COSTS_N_INSNS (6), /* DI */
209 COSTS_N_INSNS (6)}, /* other */
210 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
211 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
212 COSTS_N_INSNS (23), /* HI */
213 COSTS_N_INSNS (23), /* SI */
214 COSTS_N_INSNS (23), /* DI */
215 COSTS_N_INSNS (23)}, /* other */
216 COSTS_N_INSNS (3), /* cost of movsx */
217 COSTS_N_INSNS (2), /* cost of movzx */
218 15, /* "large" insn */
219 3, /* MOVE_RATIO */
220 4, /* cost for loading QImode using movzbl */
221 {2, 4, 2}, /* cost of loading integer registers
222 in QImode, HImode and SImode.
223 Relative to reg-reg move (2). */
224 {2, 4, 2}, /* cost of storing integer registers */
225 2, /* cost of reg,reg fld/fst */
226 {8, 8, 8}, /* cost of loading fp registers
227 in SFmode, DFmode and XFmode */
228 {8, 8, 8}, /* cost of storing fp registers
229 in SFmode, DFmode and XFmode */
230 2, /* cost of moving MMX register */
231 {4, 8}, /* cost of loading MMX registers
232 in SImode and DImode */
233 {4, 8}, /* cost of storing MMX registers
234 in SImode and DImode */
235 2, /* cost of moving SSE register */
236 {4, 8, 16}, /* cost of loading SSE registers
237 in SImode, DImode and TImode */
238 {4, 8, 16}, /* cost of storing SSE registers
239 in SImode, DImode and TImode */
240 3, /* MMX or SSE register to integer */
241 0, /* size of l1 cache */
242 0, /* size of l2 cache */
243 0, /* size of prefetch block */
244 0, /* number of parallel prefetches */
245 1, /* Branch cost */
246 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
247 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
248 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
249 COSTS_N_INSNS (22), /* cost of FABS instruction. */
250 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
251 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
252 i386_memcpy,
253 i386_memset,
254 1, /* scalar_stmt_cost. */
255 1, /* scalar load_cost. */
256 1, /* scalar_store_cost. */
257 1, /* vec_stmt_cost. */
258 1, /* vec_to_scalar_cost. */
259 1, /* scalar_to_vec_cost. */
260 1, /* vec_align_load_cost. */
261 2, /* vec_unalign_load_cost. */
262 1, /* vec_store_cost. */
263 3, /* cond_taken_branch_cost. */
264 1, /* cond_not_taken_branch_cost. */
267 static stringop_algs i486_memcpy[2] = {
268 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
269 DUMMY_STRINGOP_ALGS};
270 static stringop_algs i486_memset[2] = {
271 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
272 DUMMY_STRINGOP_ALGS};
274 static const
275 struct processor_costs i486_cost = { /* 486 specific costs */
276 COSTS_N_INSNS (1), /* cost of an add instruction */
277 COSTS_N_INSNS (1), /* cost of a lea instruction */
278 COSTS_N_INSNS (3), /* variable shift costs */
279 COSTS_N_INSNS (2), /* constant shift costs */
280 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
281 COSTS_N_INSNS (12), /* HI */
282 COSTS_N_INSNS (12), /* SI */
283 COSTS_N_INSNS (12), /* DI */
284 COSTS_N_INSNS (12)}, /* other */
285 1, /* cost of multiply per each bit set */
286 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
287 COSTS_N_INSNS (40), /* HI */
288 COSTS_N_INSNS (40), /* SI */
289 COSTS_N_INSNS (40), /* DI */
290 COSTS_N_INSNS (40)}, /* other */
291 COSTS_N_INSNS (3), /* cost of movsx */
292 COSTS_N_INSNS (2), /* cost of movzx */
293 15, /* "large" insn */
294 3, /* MOVE_RATIO */
295 4, /* cost for loading QImode using movzbl */
296 {2, 4, 2}, /* cost of loading integer registers
297 in QImode, HImode and SImode.
298 Relative to reg-reg move (2). */
299 {2, 4, 2}, /* cost of storing integer registers */
300 2, /* cost of reg,reg fld/fst */
301 {8, 8, 8}, /* cost of loading fp registers
302 in SFmode, DFmode and XFmode */
303 {8, 8, 8}, /* cost of storing fp registers
304 in SFmode, DFmode and XFmode */
305 2, /* cost of moving MMX register */
306 {4, 8}, /* cost of loading MMX registers
307 in SImode and DImode */
308 {4, 8}, /* cost of storing MMX registers
309 in SImode and DImode */
310 2, /* cost of moving SSE register */
311 {4, 8, 16}, /* cost of loading SSE registers
312 in SImode, DImode and TImode */
313 {4, 8, 16}, /* cost of storing SSE registers
314 in SImode, DImode and TImode */
315 3, /* MMX or SSE register to integer */
316 4, /* size of l1 cache. 486 has 8kB cache
317 shared for code and data, so 4kB is
318 not really precise. */
319 4, /* size of l2 cache */
320 0, /* size of prefetch block */
321 0, /* number of parallel prefetches */
322 1, /* Branch cost */
323 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
324 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
325 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
326 COSTS_N_INSNS (3), /* cost of FABS instruction. */
327 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
328 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
329 i486_memcpy,
330 i486_memset,
331 1, /* scalar_stmt_cost. */
332 1, /* scalar load_cost. */
333 1, /* scalar_store_cost. */
334 1, /* vec_stmt_cost. */
335 1, /* vec_to_scalar_cost. */
336 1, /* scalar_to_vec_cost. */
337 1, /* vec_align_load_cost. */
338 2, /* vec_unalign_load_cost. */
339 1, /* vec_store_cost. */
340 3, /* cond_taken_branch_cost. */
341 1, /* cond_not_taken_branch_cost. */
344 static stringop_algs pentium_memcpy[2] = {
345 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
346 DUMMY_STRINGOP_ALGS};
347 static stringop_algs pentium_memset[2] = {
348 {libcall, {{-1, rep_prefix_4_byte, false}}},
349 DUMMY_STRINGOP_ALGS};
351 static const
352 struct processor_costs pentium_cost = {
353 COSTS_N_INSNS (1), /* cost of an add instruction */
354 COSTS_N_INSNS (1), /* cost of a lea instruction */
355 COSTS_N_INSNS (4), /* variable shift costs */
356 COSTS_N_INSNS (1), /* constant shift costs */
357 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
358 COSTS_N_INSNS (11), /* HI */
359 COSTS_N_INSNS (11), /* SI */
360 COSTS_N_INSNS (11), /* DI */
361 COSTS_N_INSNS (11)}, /* other */
362 0, /* cost of multiply per each bit set */
363 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
364 COSTS_N_INSNS (25), /* HI */
365 COSTS_N_INSNS (25), /* SI */
366 COSTS_N_INSNS (25), /* DI */
367 COSTS_N_INSNS (25)}, /* other */
368 COSTS_N_INSNS (3), /* cost of movsx */
369 COSTS_N_INSNS (2), /* cost of movzx */
370 8, /* "large" insn */
371 6, /* MOVE_RATIO */
372 6, /* cost for loading QImode using movzbl */
373 {2, 4, 2}, /* cost of loading integer registers
374 in QImode, HImode and SImode.
375 Relative to reg-reg move (2). */
376 {2, 4, 2}, /* cost of storing integer registers */
377 2, /* cost of reg,reg fld/fst */
378 {2, 2, 6}, /* cost of loading fp registers
379 in SFmode, DFmode and XFmode */
380 {4, 4, 6}, /* cost of storing fp registers
381 in SFmode, DFmode and XFmode */
382 8, /* cost of moving MMX register */
383 {8, 8}, /* cost of loading MMX registers
384 in SImode and DImode */
385 {8, 8}, /* cost of storing MMX registers
386 in SImode and DImode */
387 2, /* cost of moving SSE register */
388 {4, 8, 16}, /* cost of loading SSE registers
389 in SImode, DImode and TImode */
390 {4, 8, 16}, /* cost of storing SSE registers
391 in SImode, DImode and TImode */
392 3, /* MMX or SSE register to integer */
393 8, /* size of l1 cache. */
394 8, /* size of l2 cache */
395 0, /* size of prefetch block */
396 0, /* number of parallel prefetches */
397 2, /* Branch cost */
398 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
399 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
400 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
401 COSTS_N_INSNS (1), /* cost of FABS instruction. */
402 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
403 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
404 pentium_memcpy,
405 pentium_memset,
406 1, /* scalar_stmt_cost. */
407 1, /* scalar load_cost. */
408 1, /* scalar_store_cost. */
409 1, /* vec_stmt_cost. */
410 1, /* vec_to_scalar_cost. */
411 1, /* scalar_to_vec_cost. */
412 1, /* vec_align_load_cost. */
413 2, /* vec_unalign_load_cost. */
414 1, /* vec_store_cost. */
415 3, /* cond_taken_branch_cost. */
416 1, /* cond_not_taken_branch_cost. */
419 static const
420 struct processor_costs iamcu_cost = {
421 COSTS_N_INSNS (1), /* cost of an add instruction */
422 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
423 COSTS_N_INSNS (1), /* variable shift costs */
424 COSTS_N_INSNS (1), /* constant shift costs */
425 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
426 COSTS_N_INSNS (11), /* HI */
427 COSTS_N_INSNS (11), /* SI */
428 COSTS_N_INSNS (11), /* DI */
429 COSTS_N_INSNS (11)}, /* other */
430 0, /* cost of multiply per each bit set */
431 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
432 COSTS_N_INSNS (25), /* HI */
433 COSTS_N_INSNS (25), /* SI */
434 COSTS_N_INSNS (25), /* DI */
435 COSTS_N_INSNS (25)}, /* other */
436 COSTS_N_INSNS (3), /* cost of movsx */
437 COSTS_N_INSNS (2), /* cost of movzx */
438 8, /* "large" insn */
439 9, /* MOVE_RATIO */
440 6, /* cost for loading QImode using movzbl */
441 {2, 4, 2}, /* cost of loading integer registers
442 in QImode, HImode and SImode.
443 Relative to reg-reg move (2). */
444 {2, 4, 2}, /* cost of storing integer registers */
445 2, /* cost of reg,reg fld/fst */
446 {2, 2, 6}, /* cost of loading fp registers
447 in SFmode, DFmode and XFmode */
448 {4, 4, 6}, /* cost of storing fp registers
449 in SFmode, DFmode and XFmode */
450 8, /* cost of moving MMX register */
451 {8, 8}, /* cost of loading MMX registers
452 in SImode and DImode */
453 {8, 8}, /* cost of storing MMX registers
454 in SImode and DImode */
455 2, /* cost of moving SSE register */
456 {4, 8, 16}, /* cost of loading SSE registers
457 in SImode, DImode and TImode */
458 {4, 8, 16}, /* cost of storing SSE registers
459 in SImode, DImode and TImode */
460 3, /* MMX or SSE register to integer */
461 8, /* size of l1 cache. */
462 8, /* size of l2 cache */
463 0, /* size of prefetch block */
464 0, /* number of parallel prefetches */
465 2, /* Branch cost */
466 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
467 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
468 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
469 COSTS_N_INSNS (1), /* cost of FABS instruction. */
470 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
471 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
472 pentium_memcpy,
473 pentium_memset,
474 1, /* scalar_stmt_cost. */
475 1, /* scalar load_cost. */
476 1, /* scalar_store_cost. */
477 1, /* vec_stmt_cost. */
478 1, /* vec_to_scalar_cost. */
479 1, /* scalar_to_vec_cost. */
480 1, /* vec_align_load_cost. */
481 2, /* vec_unalign_load_cost. */
482 1, /* vec_store_cost. */
483 3, /* cond_taken_branch_cost. */
484 1, /* cond_not_taken_branch_cost. */
487 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
488 (we ensure the alignment). For small blocks inline loop is still a
489 noticeable win, for bigger blocks either rep movsl or rep movsb is
490 way to go. Rep movsb has apparently more expensive startup time in CPU,
491 but after 4K the difference is down in the noise. */
492 static stringop_algs pentiumpro_memcpy[2] = {
493 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
494 {8192, rep_prefix_4_byte, false},
495 {-1, rep_prefix_1_byte, false}}},
496 DUMMY_STRINGOP_ALGS};
497 static stringop_algs pentiumpro_memset[2] = {
498 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
499 {8192, rep_prefix_4_byte, false},
500 {-1, libcall, false}}},
501 DUMMY_STRINGOP_ALGS};
502 static const
503 struct processor_costs pentiumpro_cost = {
504 COSTS_N_INSNS (1), /* cost of an add instruction */
505 COSTS_N_INSNS (1), /* cost of a lea instruction */
506 COSTS_N_INSNS (1), /* variable shift costs */
507 COSTS_N_INSNS (1), /* constant shift costs */
508 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
509 COSTS_N_INSNS (4), /* HI */
510 COSTS_N_INSNS (4), /* SI */
511 COSTS_N_INSNS (4), /* DI */
512 COSTS_N_INSNS (4)}, /* other */
513 0, /* cost of multiply per each bit set */
514 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
515 COSTS_N_INSNS (17), /* HI */
516 COSTS_N_INSNS (17), /* SI */
517 COSTS_N_INSNS (17), /* DI */
518 COSTS_N_INSNS (17)}, /* other */
519 COSTS_N_INSNS (1), /* cost of movsx */
520 COSTS_N_INSNS (1), /* cost of movzx */
521 8, /* "large" insn */
522 6, /* MOVE_RATIO */
523 2, /* cost for loading QImode using movzbl */
524 {4, 4, 4}, /* cost of loading integer registers
525 in QImode, HImode and SImode.
526 Relative to reg-reg move (2). */
527 {2, 2, 2}, /* cost of storing integer registers */
528 2, /* cost of reg,reg fld/fst */
529 {2, 2, 6}, /* cost of loading fp registers
530 in SFmode, DFmode and XFmode */
531 {4, 4, 6}, /* cost of storing fp registers
532 in SFmode, DFmode and XFmode */
533 2, /* cost of moving MMX register */
534 {2, 2}, /* cost of loading MMX registers
535 in SImode and DImode */
536 {2, 2}, /* cost of storing MMX registers
537 in SImode and DImode */
538 2, /* cost of moving SSE register */
539 {2, 2, 8}, /* cost of loading SSE registers
540 in SImode, DImode and TImode */
541 {2, 2, 8}, /* cost of storing SSE registers
542 in SImode, DImode and TImode */
543 3, /* MMX or SSE register to integer */
544 8, /* size of l1 cache. */
545 256, /* size of l2 cache */
546 32, /* size of prefetch block */
547 6, /* number of parallel prefetches */
548 2, /* Branch cost */
549 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
550 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
551 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
552 COSTS_N_INSNS (2), /* cost of FABS instruction. */
553 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
554 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
555 pentiumpro_memcpy,
556 pentiumpro_memset,
557 1, /* scalar_stmt_cost. */
558 1, /* scalar load_cost. */
559 1, /* scalar_store_cost. */
560 1, /* vec_stmt_cost. */
561 1, /* vec_to_scalar_cost. */
562 1, /* scalar_to_vec_cost. */
563 1, /* vec_align_load_cost. */
564 2, /* vec_unalign_load_cost. */
565 1, /* vec_store_cost. */
566 3, /* cond_taken_branch_cost. */
567 1, /* cond_not_taken_branch_cost. */
570 static stringop_algs geode_memcpy[2] = {
571 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
572 DUMMY_STRINGOP_ALGS};
573 static stringop_algs geode_memset[2] = {
574 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
575 DUMMY_STRINGOP_ALGS};
576 static const
577 struct processor_costs geode_cost = {
578 COSTS_N_INSNS (1), /* cost of an add instruction */
579 COSTS_N_INSNS (1), /* cost of a lea instruction */
580 COSTS_N_INSNS (2), /* variable shift costs */
581 COSTS_N_INSNS (1), /* constant shift costs */
582 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
583 COSTS_N_INSNS (4), /* HI */
584 COSTS_N_INSNS (7), /* SI */
585 COSTS_N_INSNS (7), /* DI */
586 COSTS_N_INSNS (7)}, /* other */
587 0, /* cost of multiply per each bit set */
588 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
589 COSTS_N_INSNS (23), /* HI */
590 COSTS_N_INSNS (39), /* SI */
591 COSTS_N_INSNS (39), /* DI */
592 COSTS_N_INSNS (39)}, /* other */
593 COSTS_N_INSNS (1), /* cost of movsx */
594 COSTS_N_INSNS (1), /* cost of movzx */
595 8, /* "large" insn */
596 4, /* MOVE_RATIO */
597 1, /* cost for loading QImode using movzbl */
598 {1, 1, 1}, /* cost of loading integer registers
599 in QImode, HImode and SImode.
600 Relative to reg-reg move (2). */
601 {1, 1, 1}, /* cost of storing integer registers */
602 1, /* cost of reg,reg fld/fst */
603 {1, 1, 1}, /* cost of loading fp registers
604 in SFmode, DFmode and XFmode */
605 {4, 6, 6}, /* cost of storing fp registers
606 in SFmode, DFmode and XFmode */
608 1, /* cost of moving MMX register */
609 {1, 1}, /* cost of loading MMX registers
610 in SImode and DImode */
611 {1, 1}, /* cost of storing MMX registers
612 in SImode and DImode */
613 1, /* cost of moving SSE register */
614 {1, 1, 1}, /* cost of loading SSE registers
615 in SImode, DImode and TImode */
616 {1, 1, 1}, /* cost of storing SSE registers
617 in SImode, DImode and TImode */
618 1, /* MMX or SSE register to integer */
619 64, /* size of l1 cache. */
620 128, /* size of l2 cache. */
621 32, /* size of prefetch block */
622 1, /* number of parallel prefetches */
623 1, /* Branch cost */
624 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
625 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
626 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
627 COSTS_N_INSNS (1), /* cost of FABS instruction. */
628 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
629 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
630 geode_memcpy,
631 geode_memset,
632 1, /* scalar_stmt_cost. */
633 1, /* scalar load_cost. */
634 1, /* scalar_store_cost. */
635 1, /* vec_stmt_cost. */
636 1, /* vec_to_scalar_cost. */
637 1, /* scalar_to_vec_cost. */
638 1, /* vec_align_load_cost. */
639 2, /* vec_unalign_load_cost. */
640 1, /* vec_store_cost. */
641 3, /* cond_taken_branch_cost. */
642 1, /* cond_not_taken_branch_cost. */
645 static stringop_algs k6_memcpy[2] = {
646 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
647 DUMMY_STRINGOP_ALGS};
648 static stringop_algs k6_memset[2] = {
649 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
650 DUMMY_STRINGOP_ALGS};
651 static const
652 struct processor_costs k6_cost = {
653 COSTS_N_INSNS (1), /* cost of an add instruction */
654 COSTS_N_INSNS (2), /* cost of a lea instruction */
655 COSTS_N_INSNS (1), /* variable shift costs */
656 COSTS_N_INSNS (1), /* constant shift costs */
657 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
658 COSTS_N_INSNS (3), /* HI */
659 COSTS_N_INSNS (3), /* SI */
660 COSTS_N_INSNS (3), /* DI */
661 COSTS_N_INSNS (3)}, /* other */
662 0, /* cost of multiply per each bit set */
663 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
664 COSTS_N_INSNS (18), /* HI */
665 COSTS_N_INSNS (18), /* SI */
666 COSTS_N_INSNS (18), /* DI */
667 COSTS_N_INSNS (18)}, /* other */
668 COSTS_N_INSNS (2), /* cost of movsx */
669 COSTS_N_INSNS (2), /* cost of movzx */
670 8, /* "large" insn */
671 4, /* MOVE_RATIO */
672 3, /* cost for loading QImode using movzbl */
673 {4, 5, 4}, /* cost of loading integer registers
674 in QImode, HImode and SImode.
675 Relative to reg-reg move (2). */
676 {2, 3, 2}, /* cost of storing integer registers */
677 4, /* cost of reg,reg fld/fst */
678 {6, 6, 6}, /* cost of loading fp registers
679 in SFmode, DFmode and XFmode */
680 {4, 4, 4}, /* cost of storing fp registers
681 in SFmode, DFmode and XFmode */
682 2, /* cost of moving MMX register */
683 {2, 2}, /* cost of loading MMX registers
684 in SImode and DImode */
685 {2, 2}, /* cost of storing MMX registers
686 in SImode and DImode */
687 2, /* cost of moving SSE register */
688 {2, 2, 8}, /* cost of loading SSE registers
689 in SImode, DImode and TImode */
690 {2, 2, 8}, /* cost of storing SSE registers
691 in SImode, DImode and TImode */
692 6, /* MMX or SSE register to integer */
693 32, /* size of l1 cache. */
694 32, /* size of l2 cache. Some models
695 have integrated l2 cache, but
696 optimizing for k6 is not important
697 enough to worry about that. */
698 32, /* size of prefetch block */
699 1, /* number of parallel prefetches */
700 1, /* Branch cost */
701 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
702 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
703 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
704 COSTS_N_INSNS (2), /* cost of FABS instruction. */
705 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
706 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
707 k6_memcpy,
708 k6_memset,
709 1, /* scalar_stmt_cost. */
710 1, /* scalar load_cost. */
711 1, /* scalar_store_cost. */
712 1, /* vec_stmt_cost. */
713 1, /* vec_to_scalar_cost. */
714 1, /* scalar_to_vec_cost. */
715 1, /* vec_align_load_cost. */
716 2, /* vec_unalign_load_cost. */
717 1, /* vec_store_cost. */
718 3, /* cond_taken_branch_cost. */
719 1, /* cond_not_taken_branch_cost. */
722 /* For some reason, Athlon deals better with REP prefix (relative to loops)
723 compared to K8. Alignment becomes important after 8 bytes for memcpy and
724 128 bytes for memset. */
725 static stringop_algs athlon_memcpy[2] = {
726 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
727 DUMMY_STRINGOP_ALGS};
728 static stringop_algs athlon_memset[2] = {
729 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
730 DUMMY_STRINGOP_ALGS};
731 static const
732 struct processor_costs athlon_cost = {
733 COSTS_N_INSNS (1), /* cost of an add instruction */
734 COSTS_N_INSNS (2), /* cost of a lea instruction */
735 COSTS_N_INSNS (1), /* variable shift costs */
736 COSTS_N_INSNS (1), /* constant shift costs */
737 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
738 COSTS_N_INSNS (5), /* HI */
739 COSTS_N_INSNS (5), /* SI */
740 COSTS_N_INSNS (5), /* DI */
741 COSTS_N_INSNS (5)}, /* other */
742 0, /* cost of multiply per each bit set */
743 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
744 COSTS_N_INSNS (26), /* HI */
745 COSTS_N_INSNS (42), /* SI */
746 COSTS_N_INSNS (74), /* DI */
747 COSTS_N_INSNS (74)}, /* other */
748 COSTS_N_INSNS (1), /* cost of movsx */
749 COSTS_N_INSNS (1), /* cost of movzx */
750 8, /* "large" insn */
751 9, /* MOVE_RATIO */
752 4, /* cost for loading QImode using movzbl */
753 {3, 4, 3}, /* cost of loading integer registers
754 in QImode, HImode and SImode.
755 Relative to reg-reg move (2). */
756 {3, 4, 3}, /* cost of storing integer registers */
757 4, /* cost of reg,reg fld/fst */
758 {4, 4, 12}, /* cost of loading fp registers
759 in SFmode, DFmode and XFmode */
760 {6, 6, 8}, /* cost of storing fp registers
761 in SFmode, DFmode and XFmode */
762 2, /* cost of moving MMX register */
763 {4, 4}, /* cost of loading MMX registers
764 in SImode and DImode */
765 {4, 4}, /* cost of storing MMX registers
766 in SImode and DImode */
767 2, /* cost of moving SSE register */
768 {4, 4, 6}, /* cost of loading SSE registers
769 in SImode, DImode and TImode */
770 {4, 4, 5}, /* cost of storing SSE registers
771 in SImode, DImode and TImode */
772 5, /* MMX or SSE register to integer */
773 64, /* size of l1 cache. */
774 256, /* size of l2 cache. */
775 64, /* size of prefetch block */
776 6, /* number of parallel prefetches */
777 5, /* Branch cost */
778 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
779 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
780 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
781 COSTS_N_INSNS (2), /* cost of FABS instruction. */
782 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
783 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
784 athlon_memcpy,
785 athlon_memset,
786 1, /* scalar_stmt_cost. */
787 1, /* scalar load_cost. */
788 1, /* scalar_store_cost. */
789 1, /* vec_stmt_cost. */
790 1, /* vec_to_scalar_cost. */
791 1, /* scalar_to_vec_cost. */
792 1, /* vec_align_load_cost. */
793 2, /* vec_unalign_load_cost. */
794 1, /* vec_store_cost. */
795 3, /* cond_taken_branch_cost. */
796 1, /* cond_not_taken_branch_cost. */
799 /* K8 has optimized REP instruction for medium sized blocks, but for very
800 small blocks it is better to use loop. For large blocks, libcall can
801 do nontemporary accesses and beat inline considerably. */
802 static stringop_algs k8_memcpy[2] = {
803 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
804 {-1, rep_prefix_4_byte, false}}},
805 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
806 {-1, libcall, false}}}};
807 static stringop_algs k8_memset[2] = {
808 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
809 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
810 {libcall, {{48, unrolled_loop, false},
811 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
812 static const
813 struct processor_costs k8_cost = {
814 COSTS_N_INSNS (1), /* cost of an add instruction */
815 COSTS_N_INSNS (2), /* cost of a lea instruction */
816 COSTS_N_INSNS (1), /* variable shift costs */
817 COSTS_N_INSNS (1), /* constant shift costs */
818 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
819 COSTS_N_INSNS (4), /* HI */
820 COSTS_N_INSNS (3), /* SI */
821 COSTS_N_INSNS (4), /* DI */
822 COSTS_N_INSNS (5)}, /* other */
823 0, /* cost of multiply per each bit set */
824 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
825 COSTS_N_INSNS (26), /* HI */
826 COSTS_N_INSNS (42), /* SI */
827 COSTS_N_INSNS (74), /* DI */
828 COSTS_N_INSNS (74)}, /* other */
829 COSTS_N_INSNS (1), /* cost of movsx */
830 COSTS_N_INSNS (1), /* cost of movzx */
831 8, /* "large" insn */
832 9, /* MOVE_RATIO */
833 4, /* cost for loading QImode using movzbl */
834 {3, 4, 3}, /* cost of loading integer registers
835 in QImode, HImode and SImode.
836 Relative to reg-reg move (2). */
837 {3, 4, 3}, /* cost of storing integer registers */
838 4, /* cost of reg,reg fld/fst */
839 {4, 4, 12}, /* cost of loading fp registers
840 in SFmode, DFmode and XFmode */
841 {6, 6, 8}, /* cost of storing fp registers
842 in SFmode, DFmode and XFmode */
843 2, /* cost of moving MMX register */
844 {3, 3}, /* cost of loading MMX registers
845 in SImode and DImode */
846 {4, 4}, /* cost of storing MMX registers
847 in SImode and DImode */
848 2, /* cost of moving SSE register */
849 {4, 3, 6}, /* cost of loading SSE registers
850 in SImode, DImode and TImode */
851 {4, 4, 5}, /* cost of storing SSE registers
852 in SImode, DImode and TImode */
853 5, /* MMX or SSE register to integer */
854 64, /* size of l1 cache. */
855 512, /* size of l2 cache. */
856 64, /* size of prefetch block */
857 /* New AMD processors never drop prefetches; if they cannot be performed
858 immediately, they are queued. We set number of simultaneous prefetches
859 to a large constant to reflect this (it probably is not a good idea not
860 to limit number of prefetches at all, as their execution also takes some
861 time). */
862 100, /* number of parallel prefetches */
863 3, /* Branch cost */
864 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
865 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
866 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
867 COSTS_N_INSNS (2), /* cost of FABS instruction. */
868 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
869 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
871 k8_memcpy,
872 k8_memset,
873 4, /* scalar_stmt_cost. */
874 2, /* scalar load_cost. */
875 2, /* scalar_store_cost. */
876 5, /* vec_stmt_cost. */
877 0, /* vec_to_scalar_cost. */
878 2, /* scalar_to_vec_cost. */
879 2, /* vec_align_load_cost. */
880 3, /* vec_unalign_load_cost. */
881 3, /* vec_store_cost. */
882 3, /* cond_taken_branch_cost. */
883 2, /* cond_not_taken_branch_cost. */
886 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
887 very small blocks it is better to use loop. For large blocks, libcall can
888 do nontemporary accesses and beat inline considerably. */
889 static stringop_algs amdfam10_memcpy[2] = {
890 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
891 {-1, rep_prefix_4_byte, false}}},
892 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
893 {-1, libcall, false}}}};
894 static stringop_algs amdfam10_memset[2] = {
895 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
896 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
897 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
898 {-1, libcall, false}}}};
899 struct processor_costs amdfam10_cost = {
900 COSTS_N_INSNS (1), /* cost of an add instruction */
901 COSTS_N_INSNS (2), /* cost of a lea instruction */
902 COSTS_N_INSNS (1), /* variable shift costs */
903 COSTS_N_INSNS (1), /* constant shift costs */
904 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
905 COSTS_N_INSNS (4), /* HI */
906 COSTS_N_INSNS (3), /* SI */
907 COSTS_N_INSNS (4), /* DI */
908 COSTS_N_INSNS (5)}, /* other */
909 0, /* cost of multiply per each bit set */
910 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
911 COSTS_N_INSNS (35), /* HI */
912 COSTS_N_INSNS (51), /* SI */
913 COSTS_N_INSNS (83), /* DI */
914 COSTS_N_INSNS (83)}, /* other */
915 COSTS_N_INSNS (1), /* cost of movsx */
916 COSTS_N_INSNS (1), /* cost of movzx */
917 8, /* "large" insn */
918 9, /* MOVE_RATIO */
919 4, /* cost for loading QImode using movzbl */
920 {3, 4, 3}, /* cost of loading integer registers
921 in QImode, HImode and SImode.
922 Relative to reg-reg move (2). */
923 {3, 4, 3}, /* cost of storing integer registers */
924 4, /* cost of reg,reg fld/fst */
925 {4, 4, 12}, /* cost of loading fp registers
926 in SFmode, DFmode and XFmode */
927 {6, 6, 8}, /* cost of storing fp registers
928 in SFmode, DFmode and XFmode */
929 2, /* cost of moving MMX register */
930 {3, 3}, /* cost of loading MMX registers
931 in SImode and DImode */
932 {4, 4}, /* cost of storing MMX registers
933 in SImode and DImode */
934 2, /* cost of moving SSE register */
935 {4, 4, 3}, /* cost of loading SSE registers
936 in SImode, DImode and TImode */
937 {4, 4, 5}, /* cost of storing SSE registers
938 in SImode, DImode and TImode */
939 3, /* MMX or SSE register to integer */
940 /* On K8:
941 MOVD reg64, xmmreg Double FSTORE 4
942 MOVD reg32, xmmreg Double FSTORE 4
943 On AMDFAM10:
944 MOVD reg64, xmmreg Double FADD 3
945 1/1 1/1
946 MOVD reg32, xmmreg Double FADD 3
947 1/1 1/1 */
948 64, /* size of l1 cache. */
949 512, /* size of l2 cache. */
950 64, /* size of prefetch block */
951 /* New AMD processors never drop prefetches; if they cannot be performed
952 immediately, they are queued. We set number of simultaneous prefetches
953 to a large constant to reflect this (it probably is not a good idea not
954 to limit number of prefetches at all, as their execution also takes some
955 time). */
956 100, /* number of parallel prefetches */
957 2, /* Branch cost */
958 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
959 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
960 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
961 COSTS_N_INSNS (2), /* cost of FABS instruction. */
962 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
963 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
965 amdfam10_memcpy,
966 amdfam10_memset,
967 4, /* scalar_stmt_cost. */
968 2, /* scalar load_cost. */
969 2, /* scalar_store_cost. */
970 6, /* vec_stmt_cost. */
971 0, /* vec_to_scalar_cost. */
972 2, /* scalar_to_vec_cost. */
973 2, /* vec_align_load_cost. */
974 2, /* vec_unalign_load_cost. */
975 2, /* vec_store_cost. */
976 2, /* cond_taken_branch_cost. */
977 1, /* cond_not_taken_branch_cost. */
980 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
981 very small blocks it is better to use loop. For large blocks, libcall
982 can do nontemporary accesses and beat inline considerably. */
983 static stringop_algs bdver1_memcpy[2] = {
984 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
985 {-1, rep_prefix_4_byte, false}}},
986 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
987 {-1, libcall, false}}}};
988 static stringop_algs bdver1_memset[2] = {
989 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
990 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
991 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
992 {-1, libcall, false}}}};
994 const struct processor_costs bdver1_cost = {
995 COSTS_N_INSNS (1), /* cost of an add instruction */
996 COSTS_N_INSNS (1), /* cost of a lea instruction */
997 COSTS_N_INSNS (1), /* variable shift costs */
998 COSTS_N_INSNS (1), /* constant shift costs */
999 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1000 COSTS_N_INSNS (4), /* HI */
1001 COSTS_N_INSNS (4), /* SI */
1002 COSTS_N_INSNS (6), /* DI */
1003 COSTS_N_INSNS (6)}, /* other */
1004 0, /* cost of multiply per each bit set */
1005 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1006 COSTS_N_INSNS (35), /* HI */
1007 COSTS_N_INSNS (51), /* SI */
1008 COSTS_N_INSNS (83), /* DI */
1009 COSTS_N_INSNS (83)}, /* other */
1010 COSTS_N_INSNS (1), /* cost of movsx */
1011 COSTS_N_INSNS (1), /* cost of movzx */
1012 8, /* "large" insn */
1013 9, /* MOVE_RATIO */
1014 4, /* cost for loading QImode using movzbl */
1015 {5, 5, 4}, /* cost of loading integer registers
1016 in QImode, HImode and SImode.
1017 Relative to reg-reg move (2). */
1018 {4, 4, 4}, /* cost of storing integer registers */
1019 2, /* cost of reg,reg fld/fst */
1020 {5, 5, 12}, /* cost of loading fp registers
1021 in SFmode, DFmode and XFmode */
1022 {4, 4, 8}, /* cost of storing fp registers
1023 in SFmode, DFmode and XFmode */
1024 2, /* cost of moving MMX register */
1025 {4, 4}, /* cost of loading MMX registers
1026 in SImode and DImode */
1027 {4, 4}, /* cost of storing MMX registers
1028 in SImode and DImode */
1029 2, /* cost of moving SSE register */
1030 {4, 4, 4}, /* cost of loading SSE registers
1031 in SImode, DImode and TImode */
1032 {4, 4, 4}, /* cost of storing SSE registers
1033 in SImode, DImode and TImode */
1034 2, /* MMX or SSE register to integer */
1035 /* On K8:
1036 MOVD reg64, xmmreg Double FSTORE 4
1037 MOVD reg32, xmmreg Double FSTORE 4
1038 On AMDFAM10:
1039 MOVD reg64, xmmreg Double FADD 3
1040 1/1 1/1
1041 MOVD reg32, xmmreg Double FADD 3
1042 1/1 1/1 */
1043 16, /* size of l1 cache. */
1044 2048, /* size of l2 cache. */
1045 64, /* size of prefetch block */
1046 /* New AMD processors never drop prefetches; if they cannot be performed
1047 immediately, they are queued. We set number of simultaneous prefetches
1048 to a large constant to reflect this (it probably is not a good idea not
1049 to limit number of prefetches at all, as their execution also takes some
1050 time). */
1051 100, /* number of parallel prefetches */
1052 2, /* Branch cost */
1053 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1054 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1055 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1056 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1057 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1058 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1060 bdver1_memcpy,
1061 bdver1_memset,
1062 6, /* scalar_stmt_cost. */
1063 4, /* scalar load_cost. */
1064 4, /* scalar_store_cost. */
1065 6, /* vec_stmt_cost. */
1066 0, /* vec_to_scalar_cost. */
1067 2, /* scalar_to_vec_cost. */
1068 4, /* vec_align_load_cost. */
1069 4, /* vec_unalign_load_cost. */
1070 4, /* vec_store_cost. */
1071 4, /* cond_taken_branch_cost. */
1072 2, /* cond_not_taken_branch_cost. */
1075 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1076 very small blocks it is better to use loop. For large blocks, libcall
1077 can do nontemporary accesses and beat inline considerably. */
1079 static stringop_algs bdver2_memcpy[2] = {
1080 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1081 {-1, rep_prefix_4_byte, false}}},
1082 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1083 {-1, libcall, false}}}};
1084 static stringop_algs bdver2_memset[2] = {
1085 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1086 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1087 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1088 {-1, libcall, false}}}};
1090 const struct processor_costs bdver2_cost = {
1091 COSTS_N_INSNS (1), /* cost of an add instruction */
1092 COSTS_N_INSNS (1), /* cost of a lea instruction */
1093 COSTS_N_INSNS (1), /* variable shift costs */
1094 COSTS_N_INSNS (1), /* constant shift costs */
1095 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1096 COSTS_N_INSNS (4), /* HI */
1097 COSTS_N_INSNS (4), /* SI */
1098 COSTS_N_INSNS (6), /* DI */
1099 COSTS_N_INSNS (6)}, /* other */
1100 0, /* cost of multiply per each bit set */
1101 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1102 COSTS_N_INSNS (35), /* HI */
1103 COSTS_N_INSNS (51), /* SI */
1104 COSTS_N_INSNS (83), /* DI */
1105 COSTS_N_INSNS (83)}, /* other */
1106 COSTS_N_INSNS (1), /* cost of movsx */
1107 COSTS_N_INSNS (1), /* cost of movzx */
1108 8, /* "large" insn */
1109 9, /* MOVE_RATIO */
1110 4, /* cost for loading QImode using movzbl */
1111 {5, 5, 4}, /* cost of loading integer registers
1112 in QImode, HImode and SImode.
1113 Relative to reg-reg move (2). */
1114 {4, 4, 4}, /* cost of storing integer registers */
1115 2, /* cost of reg,reg fld/fst */
1116 {5, 5, 12}, /* cost of loading fp registers
1117 in SFmode, DFmode and XFmode */
1118 {4, 4, 8}, /* cost of storing fp registers
1119 in SFmode, DFmode and XFmode */
1120 2, /* cost of moving MMX register */
1121 {4, 4}, /* cost of loading MMX registers
1122 in SImode and DImode */
1123 {4, 4}, /* cost of storing MMX registers
1124 in SImode and DImode */
1125 2, /* cost of moving SSE register */
1126 {4, 4, 4}, /* cost of loading SSE registers
1127 in SImode, DImode and TImode */
1128 {4, 4, 4}, /* cost of storing SSE registers
1129 in SImode, DImode and TImode */
1130 2, /* MMX or SSE register to integer */
1131 /* On K8:
1132 MOVD reg64, xmmreg Double FSTORE 4
1133 MOVD reg32, xmmreg Double FSTORE 4
1134 On AMDFAM10:
1135 MOVD reg64, xmmreg Double FADD 3
1136 1/1 1/1
1137 MOVD reg32, xmmreg Double FADD 3
1138 1/1 1/1 */
1139 16, /* size of l1 cache. */
1140 2048, /* size of l2 cache. */
1141 64, /* size of prefetch block */
1142 /* New AMD processors never drop prefetches; if they cannot be performed
1143 immediately, they are queued. We set number of simultaneous prefetches
1144 to a large constant to reflect this (it probably is not a good idea not
1145 to limit number of prefetches at all, as their execution also takes some
1146 time). */
1147 100, /* number of parallel prefetches */
1148 2, /* Branch cost */
1149 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1150 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1151 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1152 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1153 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1154 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1156 bdver2_memcpy,
1157 bdver2_memset,
1158 6, /* scalar_stmt_cost. */
1159 4, /* scalar load_cost. */
1160 4, /* scalar_store_cost. */
1161 6, /* vec_stmt_cost. */
1162 0, /* vec_to_scalar_cost. */
1163 2, /* scalar_to_vec_cost. */
1164 4, /* vec_align_load_cost. */
1165 4, /* vec_unalign_load_cost. */
1166 4, /* vec_store_cost. */
1167 4, /* cond_taken_branch_cost. */
1168 2, /* cond_not_taken_branch_cost. */
1172 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1173 very small blocks it is better to use loop. For large blocks, libcall
1174 can do nontemporary accesses and beat inline considerably. */
1175 static stringop_algs bdver3_memcpy[2] = {
1176 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1177 {-1, rep_prefix_4_byte, false}}},
1178 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1179 {-1, libcall, false}}}};
1180 static stringop_algs bdver3_memset[2] = {
1181 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1182 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1183 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1184 {-1, libcall, false}}}};
1185 struct processor_costs bdver3_cost = {
1186 COSTS_N_INSNS (1), /* cost of an add instruction */
1187 COSTS_N_INSNS (1), /* cost of a lea instruction */
1188 COSTS_N_INSNS (1), /* variable shift costs */
1189 COSTS_N_INSNS (1), /* constant shift costs */
1190 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1191 COSTS_N_INSNS (4), /* HI */
1192 COSTS_N_INSNS (4), /* SI */
1193 COSTS_N_INSNS (6), /* DI */
1194 COSTS_N_INSNS (6)}, /* other */
1195 0, /* cost of multiply per each bit set */
1196 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1197 COSTS_N_INSNS (35), /* HI */
1198 COSTS_N_INSNS (51), /* SI */
1199 COSTS_N_INSNS (83), /* DI */
1200 COSTS_N_INSNS (83)}, /* other */
1201 COSTS_N_INSNS (1), /* cost of movsx */
1202 COSTS_N_INSNS (1), /* cost of movzx */
1203 8, /* "large" insn */
1204 9, /* MOVE_RATIO */
1205 4, /* cost for loading QImode using movzbl */
1206 {5, 5, 4}, /* cost of loading integer registers
1207 in QImode, HImode and SImode.
1208 Relative to reg-reg move (2). */
1209 {4, 4, 4}, /* cost of storing integer registers */
1210 2, /* cost of reg,reg fld/fst */
1211 {5, 5, 12}, /* cost of loading fp registers
1212 in SFmode, DFmode and XFmode */
1213 {4, 4, 8}, /* cost of storing fp registers
1214 in SFmode, DFmode and XFmode */
1215 2, /* cost of moving MMX register */
1216 {4, 4}, /* cost of loading MMX registers
1217 in SImode and DImode */
1218 {4, 4}, /* cost of storing MMX registers
1219 in SImode and DImode */
1220 2, /* cost of moving SSE register */
1221 {4, 4, 4}, /* cost of loading SSE registers
1222 in SImode, DImode and TImode */
1223 {4, 4, 4}, /* cost of storing SSE registers
1224 in SImode, DImode and TImode */
1225 2, /* MMX or SSE register to integer */
1226 16, /* size of l1 cache. */
1227 2048, /* size of l2 cache. */
1228 64, /* size of prefetch block */
1229 /* New AMD processors never drop prefetches; if they cannot be performed
1230 immediately, they are queued. We set number of simultaneous prefetches
1231 to a large constant to reflect this (it probably is not a good idea not
1232 to limit number of prefetches at all, as their execution also takes some
1233 time). */
1234 100, /* number of parallel prefetches */
1235 2, /* Branch cost */
1236 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1237 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1238 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1239 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1240 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1241 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1243 bdver3_memcpy,
1244 bdver3_memset,
1245 6, /* scalar_stmt_cost. */
1246 4, /* scalar load_cost. */
1247 4, /* scalar_store_cost. */
1248 6, /* vec_stmt_cost. */
1249 0, /* vec_to_scalar_cost. */
1250 2, /* scalar_to_vec_cost. */
1251 4, /* vec_align_load_cost. */
1252 4, /* vec_unalign_load_cost. */
1253 4, /* vec_store_cost. */
1254 4, /* cond_taken_branch_cost. */
1255 2, /* cond_not_taken_branch_cost. */
1258 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1259 very small blocks it is better to use loop. For large blocks, libcall
1260 can do nontemporary accesses and beat inline considerably. */
1261 static stringop_algs bdver4_memcpy[2] = {
1262 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1263 {-1, rep_prefix_4_byte, false}}},
1264 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1265 {-1, libcall, false}}}};
1266 static stringop_algs bdver4_memset[2] = {
1267 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1268 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1269 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1270 {-1, libcall, false}}}};
1271 struct processor_costs bdver4_cost = {
1272 COSTS_N_INSNS (1), /* cost of an add instruction */
1273 COSTS_N_INSNS (1), /* cost of a lea instruction */
1274 COSTS_N_INSNS (1), /* variable shift costs */
1275 COSTS_N_INSNS (1), /* constant shift costs */
1276 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1277 COSTS_N_INSNS (4), /* HI */
1278 COSTS_N_INSNS (4), /* SI */
1279 COSTS_N_INSNS (6), /* DI */
1280 COSTS_N_INSNS (6)}, /* other */
1281 0, /* cost of multiply per each bit set */
1282 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1283 COSTS_N_INSNS (35), /* HI */
1284 COSTS_N_INSNS (51), /* SI */
1285 COSTS_N_INSNS (83), /* DI */
1286 COSTS_N_INSNS (83)}, /* other */
1287 COSTS_N_INSNS (1), /* cost of movsx */
1288 COSTS_N_INSNS (1), /* cost of movzx */
1289 8, /* "large" insn */
1290 9, /* MOVE_RATIO */
1291 4, /* cost for loading QImode using movzbl */
1292 {5, 5, 4}, /* cost of loading integer registers
1293 in QImode, HImode and SImode.
1294 Relative to reg-reg move (2). */
1295 {4, 4, 4}, /* cost of storing integer registers */
1296 2, /* cost of reg,reg fld/fst */
1297 {5, 5, 12}, /* cost of loading fp registers
1298 in SFmode, DFmode and XFmode */
1299 {4, 4, 8}, /* cost of storing fp registers
1300 in SFmode, DFmode and XFmode */
1301 2, /* cost of moving MMX register */
1302 {4, 4}, /* cost of loading MMX registers
1303 in SImode and DImode */
1304 {4, 4}, /* cost of storing MMX registers
1305 in SImode and DImode */
1306 2, /* cost of moving SSE register */
1307 {4, 4, 4}, /* cost of loading SSE registers
1308 in SImode, DImode and TImode */
1309 {4, 4, 4}, /* cost of storing SSE registers
1310 in SImode, DImode and TImode */
1311 2, /* MMX or SSE register to integer */
1312 16, /* size of l1 cache. */
1313 2048, /* size of l2 cache. */
1314 64, /* size of prefetch block */
1315 /* New AMD processors never drop prefetches; if they cannot be performed
1316 immediately, they are queued. We set number of simultaneous prefetches
1317 to a large constant to reflect this (it probably is not a good idea not
1318 to limit number of prefetches at all, as their execution also takes some
1319 time). */
1320 100, /* number of parallel prefetches */
1321 2, /* Branch cost */
1322 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1323 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1324 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1325 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1326 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1327 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1329 bdver4_memcpy,
1330 bdver4_memset,
1331 6, /* scalar_stmt_cost. */
1332 4, /* scalar load_cost. */
1333 4, /* scalar_store_cost. */
1334 6, /* vec_stmt_cost. */
1335 0, /* vec_to_scalar_cost. */
1336 2, /* scalar_to_vec_cost. */
1337 4, /* vec_align_load_cost. */
1338 4, /* vec_unalign_load_cost. */
1339 4, /* vec_store_cost. */
1340 4, /* cond_taken_branch_cost. */
1341 2, /* cond_not_taken_branch_cost. */
1344 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1345 very small blocks it is better to use loop. For large blocks, libcall can
1346 do nontemporary accesses and beat inline considerably. */
1347 static stringop_algs btver1_memcpy[2] = {
1348 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1349 {-1, rep_prefix_4_byte, false}}},
1350 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1351 {-1, libcall, false}}}};
1352 static stringop_algs btver1_memset[2] = {
1353 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1354 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1355 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1356 {-1, libcall, false}}}};
1357 const struct processor_costs btver1_cost = {
1358 COSTS_N_INSNS (1), /* cost of an add instruction */
1359 COSTS_N_INSNS (2), /* cost of a lea instruction */
1360 COSTS_N_INSNS (1), /* variable shift costs */
1361 COSTS_N_INSNS (1), /* constant shift costs */
1362 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1363 COSTS_N_INSNS (4), /* HI */
1364 COSTS_N_INSNS (3), /* SI */
1365 COSTS_N_INSNS (4), /* DI */
1366 COSTS_N_INSNS (5)}, /* other */
1367 0, /* cost of multiply per each bit set */
1368 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1369 COSTS_N_INSNS (35), /* HI */
1370 COSTS_N_INSNS (51), /* SI */
1371 COSTS_N_INSNS (83), /* DI */
1372 COSTS_N_INSNS (83)}, /* other */
1373 COSTS_N_INSNS (1), /* cost of movsx */
1374 COSTS_N_INSNS (1), /* cost of movzx */
1375 8, /* "large" insn */
1376 9, /* MOVE_RATIO */
1377 4, /* cost for loading QImode using movzbl */
1378 {3, 4, 3}, /* cost of loading integer registers
1379 in QImode, HImode and SImode.
1380 Relative to reg-reg move (2). */
1381 {3, 4, 3}, /* cost of storing integer registers */
1382 4, /* cost of reg,reg fld/fst */
1383 {4, 4, 12}, /* cost of loading fp registers
1384 in SFmode, DFmode and XFmode */
1385 {6, 6, 8}, /* cost of storing fp registers
1386 in SFmode, DFmode and XFmode */
1387 2, /* cost of moving MMX register */
1388 {3, 3}, /* cost of loading MMX registers
1389 in SImode and DImode */
1390 {4, 4}, /* cost of storing MMX registers
1391 in SImode and DImode */
1392 2, /* cost of moving SSE register */
1393 {4, 4, 3}, /* cost of loading SSE registers
1394 in SImode, DImode and TImode */
1395 {4, 4, 5}, /* cost of storing SSE registers
1396 in SImode, DImode and TImode */
1397 3, /* MMX or SSE register to integer */
1398 /* On K8:
1399 MOVD reg64, xmmreg Double FSTORE 4
1400 MOVD reg32, xmmreg Double FSTORE 4
1401 On AMDFAM10:
1402 MOVD reg64, xmmreg Double FADD 3
1403 1/1 1/1
1404 MOVD reg32, xmmreg Double FADD 3
1405 1/1 1/1 */
1406 32, /* size of l1 cache. */
1407 512, /* size of l2 cache. */
1408 64, /* size of prefetch block */
1409 100, /* number of parallel prefetches */
1410 2, /* Branch cost */
1411 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1412 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1413 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1414 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1415 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1416 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1418 btver1_memcpy,
1419 btver1_memset,
1420 4, /* scalar_stmt_cost. */
1421 2, /* scalar load_cost. */
1422 2, /* scalar_store_cost. */
1423 6, /* vec_stmt_cost. */
1424 0, /* vec_to_scalar_cost. */
1425 2, /* scalar_to_vec_cost. */
1426 2, /* vec_align_load_cost. */
1427 2, /* vec_unalign_load_cost. */
1428 2, /* vec_store_cost. */
1429 2, /* cond_taken_branch_cost. */
1430 1, /* cond_not_taken_branch_cost. */
1433 static stringop_algs btver2_memcpy[2] = {
1434 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1435 {-1, rep_prefix_4_byte, false}}},
1436 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1437 {-1, libcall, false}}}};
1438 static stringop_algs btver2_memset[2] = {
1439 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1440 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1441 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1442 {-1, libcall, false}}}};
1443 const struct processor_costs btver2_cost = {
1444 COSTS_N_INSNS (1), /* cost of an add instruction */
1445 COSTS_N_INSNS (2), /* cost of a lea instruction */
1446 COSTS_N_INSNS (1), /* variable shift costs */
1447 COSTS_N_INSNS (1), /* constant shift costs */
1448 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1449 COSTS_N_INSNS (4), /* HI */
1450 COSTS_N_INSNS (3), /* SI */
1451 COSTS_N_INSNS (4), /* DI */
1452 COSTS_N_INSNS (5)}, /* other */
1453 0, /* cost of multiply per each bit set */
1454 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1455 COSTS_N_INSNS (35), /* HI */
1456 COSTS_N_INSNS (51), /* SI */
1457 COSTS_N_INSNS (83), /* DI */
1458 COSTS_N_INSNS (83)}, /* other */
1459 COSTS_N_INSNS (1), /* cost of movsx */
1460 COSTS_N_INSNS (1), /* cost of movzx */
1461 8, /* "large" insn */
1462 9, /* MOVE_RATIO */
1463 4, /* cost for loading QImode using movzbl */
1464 {3, 4, 3}, /* cost of loading integer registers
1465 in QImode, HImode and SImode.
1466 Relative to reg-reg move (2). */
1467 {3, 4, 3}, /* cost of storing integer registers */
1468 4, /* cost of reg,reg fld/fst */
1469 {4, 4, 12}, /* cost of loading fp registers
1470 in SFmode, DFmode and XFmode */
1471 {6, 6, 8}, /* cost of storing fp registers
1472 in SFmode, DFmode and XFmode */
1473 2, /* cost of moving MMX register */
1474 {3, 3}, /* cost of loading MMX registers
1475 in SImode and DImode */
1476 {4, 4}, /* cost of storing MMX registers
1477 in SImode and DImode */
1478 2, /* cost of moving SSE register */
1479 {4, 4, 3}, /* cost of loading SSE registers
1480 in SImode, DImode and TImode */
1481 {4, 4, 5}, /* cost of storing SSE registers
1482 in SImode, DImode and TImode */
1483 3, /* MMX or SSE register to integer */
1484 /* On K8:
1485 MOVD reg64, xmmreg Double FSTORE 4
1486 MOVD reg32, xmmreg Double FSTORE 4
1487 On AMDFAM10:
1488 MOVD reg64, xmmreg Double FADD 3
1489 1/1 1/1
1490 MOVD reg32, xmmreg Double FADD 3
1491 1/1 1/1 */
1492 32, /* size of l1 cache. */
1493 2048, /* size of l2 cache. */
1494 64, /* size of prefetch block */
1495 100, /* number of parallel prefetches */
1496 2, /* Branch cost */
1497 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1498 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1499 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1500 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1501 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1502 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1503 btver2_memcpy,
1504 btver2_memset,
1505 4, /* scalar_stmt_cost. */
1506 2, /* scalar load_cost. */
1507 2, /* scalar_store_cost. */
1508 6, /* vec_stmt_cost. */
1509 0, /* vec_to_scalar_cost. */
1510 2, /* scalar_to_vec_cost. */
1511 2, /* vec_align_load_cost. */
1512 2, /* vec_unalign_load_cost. */
1513 2, /* vec_store_cost. */
1514 2, /* cond_taken_branch_cost. */
1515 1, /* cond_not_taken_branch_cost. */
1518 static stringop_algs pentium4_memcpy[2] = {
1519 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1520 DUMMY_STRINGOP_ALGS};
1521 static stringop_algs pentium4_memset[2] = {
1522 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1523 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1524 DUMMY_STRINGOP_ALGS};
1526 static const
1527 struct processor_costs pentium4_cost = {
1528 COSTS_N_INSNS (1), /* cost of an add instruction */
1529 COSTS_N_INSNS (3), /* cost of a lea instruction */
1530 COSTS_N_INSNS (4), /* variable shift costs */
1531 COSTS_N_INSNS (4), /* constant shift costs */
1532 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1533 COSTS_N_INSNS (15), /* HI */
1534 COSTS_N_INSNS (15), /* SI */
1535 COSTS_N_INSNS (15), /* DI */
1536 COSTS_N_INSNS (15)}, /* other */
1537 0, /* cost of multiply per each bit set */
1538 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1539 COSTS_N_INSNS (56), /* HI */
1540 COSTS_N_INSNS (56), /* SI */
1541 COSTS_N_INSNS (56), /* DI */
1542 COSTS_N_INSNS (56)}, /* other */
1543 COSTS_N_INSNS (1), /* cost of movsx */
1544 COSTS_N_INSNS (1), /* cost of movzx */
1545 16, /* "large" insn */
1546 6, /* MOVE_RATIO */
1547 2, /* cost for loading QImode using movzbl */
1548 {4, 5, 4}, /* cost of loading integer registers
1549 in QImode, HImode and SImode.
1550 Relative to reg-reg move (2). */
1551 {2, 3, 2}, /* cost of storing integer registers */
1552 2, /* cost of reg,reg fld/fst */
1553 {2, 2, 6}, /* cost of loading fp registers
1554 in SFmode, DFmode and XFmode */
1555 {4, 4, 6}, /* cost of storing fp registers
1556 in SFmode, DFmode and XFmode */
1557 2, /* cost of moving MMX register */
1558 {2, 2}, /* cost of loading MMX registers
1559 in SImode and DImode */
1560 {2, 2}, /* cost of storing MMX registers
1561 in SImode and DImode */
1562 12, /* cost of moving SSE register */
1563 {12, 12, 12}, /* cost of loading SSE registers
1564 in SImode, DImode and TImode */
1565 {2, 2, 8}, /* cost of storing SSE registers
1566 in SImode, DImode and TImode */
1567 10, /* MMX or SSE register to integer */
1568 8, /* size of l1 cache. */
1569 256, /* size of l2 cache. */
1570 64, /* size of prefetch block */
1571 6, /* number of parallel prefetches */
1572 2, /* Branch cost */
1573 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1574 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1575 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1576 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1577 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1578 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1579 pentium4_memcpy,
1580 pentium4_memset,
1581 1, /* scalar_stmt_cost. */
1582 1, /* scalar load_cost. */
1583 1, /* scalar_store_cost. */
1584 1, /* vec_stmt_cost. */
1585 1, /* vec_to_scalar_cost. */
1586 1, /* scalar_to_vec_cost. */
1587 1, /* vec_align_load_cost. */
1588 2, /* vec_unalign_load_cost. */
1589 1, /* vec_store_cost. */
1590 3, /* cond_taken_branch_cost. */
1591 1, /* cond_not_taken_branch_cost. */
1594 static stringop_algs nocona_memcpy[2] = {
1595 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1596 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1597 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1599 static stringop_algs nocona_memset[2] = {
1600 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1601 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1602 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1603 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1605 static const
1606 struct processor_costs nocona_cost = {
1607 COSTS_N_INSNS (1), /* cost of an add instruction */
1608 COSTS_N_INSNS (1), /* cost of a lea instruction */
1609 COSTS_N_INSNS (1), /* variable shift costs */
1610 COSTS_N_INSNS (1), /* constant shift costs */
1611 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1612 COSTS_N_INSNS (10), /* HI */
1613 COSTS_N_INSNS (10), /* SI */
1614 COSTS_N_INSNS (10), /* DI */
1615 COSTS_N_INSNS (10)}, /* other */
1616 0, /* cost of multiply per each bit set */
1617 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1618 COSTS_N_INSNS (66), /* HI */
1619 COSTS_N_INSNS (66), /* SI */
1620 COSTS_N_INSNS (66), /* DI */
1621 COSTS_N_INSNS (66)}, /* other */
1622 COSTS_N_INSNS (1), /* cost of movsx */
1623 COSTS_N_INSNS (1), /* cost of movzx */
1624 16, /* "large" insn */
1625 17, /* MOVE_RATIO */
1626 4, /* cost for loading QImode using movzbl */
1627 {4, 4, 4}, /* cost of loading integer registers
1628 in QImode, HImode and SImode.
1629 Relative to reg-reg move (2). */
1630 {4, 4, 4}, /* cost of storing integer registers */
1631 3, /* cost of reg,reg fld/fst */
1632 {12, 12, 12}, /* cost of loading fp registers
1633 in SFmode, DFmode and XFmode */
1634 {4, 4, 4}, /* cost of storing fp registers
1635 in SFmode, DFmode and XFmode */
1636 6, /* cost of moving MMX register */
1637 {12, 12}, /* cost of loading MMX registers
1638 in SImode and DImode */
1639 {12, 12}, /* cost of storing MMX registers
1640 in SImode and DImode */
1641 6, /* cost of moving SSE register */
1642 {12, 12, 12}, /* cost of loading SSE registers
1643 in SImode, DImode and TImode */
1644 {12, 12, 12}, /* cost of storing SSE registers
1645 in SImode, DImode and TImode */
1646 8, /* MMX or SSE register to integer */
1647 8, /* size of l1 cache. */
1648 1024, /* size of l2 cache. */
1649 64, /* size of prefetch block */
1650 8, /* number of parallel prefetches */
1651 1, /* Branch cost */
1652 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1653 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1654 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1655 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1656 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1657 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1658 nocona_memcpy,
1659 nocona_memset,
1660 1, /* scalar_stmt_cost. */
1661 1, /* scalar load_cost. */
1662 1, /* scalar_store_cost. */
1663 1, /* vec_stmt_cost. */
1664 1, /* vec_to_scalar_cost. */
1665 1, /* scalar_to_vec_cost. */
1666 1, /* vec_align_load_cost. */
1667 2, /* vec_unalign_load_cost. */
1668 1, /* vec_store_cost. */
1669 3, /* cond_taken_branch_cost. */
1670 1, /* cond_not_taken_branch_cost. */
1673 static stringop_algs atom_memcpy[2] = {
1674 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1675 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1676 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1677 static stringop_algs atom_memset[2] = {
1678 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1679 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1680 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1681 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1682 static const
1683 struct processor_costs atom_cost = {
1684 COSTS_N_INSNS (1), /* cost of an add instruction */
1685 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1686 COSTS_N_INSNS (1), /* variable shift costs */
1687 COSTS_N_INSNS (1), /* constant shift costs */
1688 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1689 COSTS_N_INSNS (4), /* HI */
1690 COSTS_N_INSNS (3), /* SI */
1691 COSTS_N_INSNS (4), /* DI */
1692 COSTS_N_INSNS (2)}, /* other */
1693 0, /* cost of multiply per each bit set */
1694 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1695 COSTS_N_INSNS (26), /* HI */
1696 COSTS_N_INSNS (42), /* SI */
1697 COSTS_N_INSNS (74), /* DI */
1698 COSTS_N_INSNS (74)}, /* other */
1699 COSTS_N_INSNS (1), /* cost of movsx */
1700 COSTS_N_INSNS (1), /* cost of movzx */
1701 8, /* "large" insn */
1702 17, /* MOVE_RATIO */
1703 4, /* cost for loading QImode using movzbl */
1704 {4, 4, 4}, /* cost of loading integer registers
1705 in QImode, HImode and SImode.
1706 Relative to reg-reg move (2). */
1707 {4, 4, 4}, /* cost of storing integer registers */
1708 4, /* cost of reg,reg fld/fst */
1709 {12, 12, 12}, /* cost of loading fp registers
1710 in SFmode, DFmode and XFmode */
1711 {6, 6, 8}, /* cost of storing fp registers
1712 in SFmode, DFmode and XFmode */
1713 2, /* cost of moving MMX register */
1714 {8, 8}, /* cost of loading MMX registers
1715 in SImode and DImode */
1716 {8, 8}, /* cost of storing MMX registers
1717 in SImode and DImode */
1718 2, /* cost of moving SSE register */
1719 {8, 8, 8}, /* cost of loading SSE registers
1720 in SImode, DImode and TImode */
1721 {8, 8, 8}, /* cost of storing SSE registers
1722 in SImode, DImode and TImode */
1723 5, /* MMX or SSE register to integer */
1724 32, /* size of l1 cache. */
1725 256, /* size of l2 cache. */
1726 64, /* size of prefetch block */
1727 6, /* number of parallel prefetches */
1728 3, /* Branch cost */
1729 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1730 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1731 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1732 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1733 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1734 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1735 atom_memcpy,
1736 atom_memset,
1737 1, /* scalar_stmt_cost. */
1738 1, /* scalar load_cost. */
1739 1, /* scalar_store_cost. */
1740 1, /* vec_stmt_cost. */
1741 1, /* vec_to_scalar_cost. */
1742 1, /* scalar_to_vec_cost. */
1743 1, /* vec_align_load_cost. */
1744 2, /* vec_unalign_load_cost. */
1745 1, /* vec_store_cost. */
1746 3, /* cond_taken_branch_cost. */
1747 1, /* cond_not_taken_branch_cost. */
1750 static stringop_algs slm_memcpy[2] = {
1751 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1752 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1753 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1754 static stringop_algs slm_memset[2] = {
1755 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1756 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1757 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1758 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1759 static const
1760 struct processor_costs slm_cost = {
1761 COSTS_N_INSNS (1), /* cost of an add instruction */
1762 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1763 COSTS_N_INSNS (1), /* variable shift costs */
1764 COSTS_N_INSNS (1), /* constant shift costs */
1765 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1766 COSTS_N_INSNS (3), /* HI */
1767 COSTS_N_INSNS (3), /* SI */
1768 COSTS_N_INSNS (4), /* DI */
1769 COSTS_N_INSNS (2)}, /* other */
1770 0, /* cost of multiply per each bit set */
1771 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1772 COSTS_N_INSNS (26), /* HI */
1773 COSTS_N_INSNS (42), /* SI */
1774 COSTS_N_INSNS (74), /* DI */
1775 COSTS_N_INSNS (74)}, /* other */
1776 COSTS_N_INSNS (1), /* cost of movsx */
1777 COSTS_N_INSNS (1), /* cost of movzx */
1778 8, /* "large" insn */
1779 17, /* MOVE_RATIO */
1780 4, /* cost for loading QImode using movzbl */
1781 {4, 4, 4}, /* cost of loading integer registers
1782 in QImode, HImode and SImode.
1783 Relative to reg-reg move (2). */
1784 {4, 4, 4}, /* cost of storing integer registers */
1785 4, /* cost of reg,reg fld/fst */
1786 {12, 12, 12}, /* cost of loading fp registers
1787 in SFmode, DFmode and XFmode */
1788 {6, 6, 8}, /* cost of storing fp registers
1789 in SFmode, DFmode and XFmode */
1790 2, /* cost of moving MMX register */
1791 {8, 8}, /* cost of loading MMX registers
1792 in SImode and DImode */
1793 {8, 8}, /* cost of storing MMX registers
1794 in SImode and DImode */
1795 2, /* cost of moving SSE register */
1796 {8, 8, 8}, /* cost of loading SSE registers
1797 in SImode, DImode and TImode */
1798 {8, 8, 8}, /* cost of storing SSE registers
1799 in SImode, DImode and TImode */
1800 5, /* MMX or SSE register to integer */
1801 32, /* size of l1 cache. */
1802 256, /* size of l2 cache. */
1803 64, /* size of prefetch block */
1804 6, /* number of parallel prefetches */
1805 3, /* Branch cost */
1806 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1807 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1808 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1809 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1810 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1811 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1812 slm_memcpy,
1813 slm_memset,
1814 1, /* scalar_stmt_cost. */
1815 1, /* scalar load_cost. */
1816 1, /* scalar_store_cost. */
1817 1, /* vec_stmt_cost. */
1818 4, /* vec_to_scalar_cost. */
1819 1, /* scalar_to_vec_cost. */
1820 1, /* vec_align_load_cost. */
1821 2, /* vec_unalign_load_cost. */
1822 1, /* vec_store_cost. */
1823 3, /* cond_taken_branch_cost. */
1824 1, /* cond_not_taken_branch_cost. */
1827 static stringop_algs intel_memcpy[2] = {
1828 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1829 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1830 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1831 static stringop_algs intel_memset[2] = {
1832 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1833 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1834 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1835 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1836 static const
1837 struct processor_costs intel_cost = {
1838 COSTS_N_INSNS (1), /* cost of an add instruction */
1839 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1840 COSTS_N_INSNS (1), /* variable shift costs */
1841 COSTS_N_INSNS (1), /* constant shift costs */
1842 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1843 COSTS_N_INSNS (3), /* HI */
1844 COSTS_N_INSNS (3), /* SI */
1845 COSTS_N_INSNS (4), /* DI */
1846 COSTS_N_INSNS (2)}, /* other */
1847 0, /* cost of multiply per each bit set */
1848 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1849 COSTS_N_INSNS (26), /* HI */
1850 COSTS_N_INSNS (42), /* SI */
1851 COSTS_N_INSNS (74), /* DI */
1852 COSTS_N_INSNS (74)}, /* other */
1853 COSTS_N_INSNS (1), /* cost of movsx */
1854 COSTS_N_INSNS (1), /* cost of movzx */
1855 8, /* "large" insn */
1856 17, /* MOVE_RATIO */
1857 4, /* cost for loading QImode using movzbl */
1858 {4, 4, 4}, /* cost of loading integer registers
1859 in QImode, HImode and SImode.
1860 Relative to reg-reg move (2). */
1861 {4, 4, 4}, /* cost of storing integer registers */
1862 4, /* cost of reg,reg fld/fst */
1863 {12, 12, 12}, /* cost of loading fp registers
1864 in SFmode, DFmode and XFmode */
1865 {6, 6, 8}, /* cost of storing fp registers
1866 in SFmode, DFmode and XFmode */
1867 2, /* cost of moving MMX register */
1868 {8, 8}, /* cost of loading MMX registers
1869 in SImode and DImode */
1870 {8, 8}, /* cost of storing MMX registers
1871 in SImode and DImode */
1872 2, /* cost of moving SSE register */
1873 {8, 8, 8}, /* cost of loading SSE registers
1874 in SImode, DImode and TImode */
1875 {8, 8, 8}, /* cost of storing SSE registers
1876 in SImode, DImode and TImode */
1877 5, /* MMX or SSE register to integer */
1878 32, /* size of l1 cache. */
1879 256, /* size of l2 cache. */
1880 64, /* size of prefetch block */
1881 6, /* number of parallel prefetches */
1882 3, /* Branch cost */
1883 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1884 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1885 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1886 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1887 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1888 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1889 intel_memcpy,
1890 intel_memset,
1891 1, /* scalar_stmt_cost. */
1892 1, /* scalar load_cost. */
1893 1, /* scalar_store_cost. */
1894 1, /* vec_stmt_cost. */
1895 4, /* vec_to_scalar_cost. */
1896 1, /* scalar_to_vec_cost. */
1897 1, /* vec_align_load_cost. */
1898 2, /* vec_unalign_load_cost. */
1899 1, /* vec_store_cost. */
1900 3, /* cond_taken_branch_cost. */
1901 1, /* cond_not_taken_branch_cost. */
1904 /* Generic should produce code tuned for Core-i7 (and newer chips)
1905 and btver1 (and newer chips). */
1907 static stringop_algs generic_memcpy[2] = {
1908 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1909 {-1, libcall, false}}},
1910 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1911 {-1, libcall, false}}}};
1912 static stringop_algs generic_memset[2] = {
1913 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1914 {-1, libcall, false}}},
1915 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1916 {-1, libcall, false}}}};
1917 static const
1918 struct processor_costs generic_cost = {
1919 COSTS_N_INSNS (1), /* cost of an add instruction */
1920 /* On all chips taken into consideration lea is 2 cycles and more. With
1921 this cost however our current implementation of synth_mult results in
1922 use of unnecessary temporary registers causing regression on several
1923 SPECfp benchmarks. */
1924 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1925 COSTS_N_INSNS (1), /* variable shift costs */
1926 COSTS_N_INSNS (1), /* constant shift costs */
1927 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1928 COSTS_N_INSNS (4), /* HI */
1929 COSTS_N_INSNS (3), /* SI */
1930 COSTS_N_INSNS (4), /* DI */
1931 COSTS_N_INSNS (2)}, /* other */
1932 0, /* cost of multiply per each bit set */
1933 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1934 COSTS_N_INSNS (26), /* HI */
1935 COSTS_N_INSNS (42), /* SI */
1936 COSTS_N_INSNS (74), /* DI */
1937 COSTS_N_INSNS (74)}, /* other */
1938 COSTS_N_INSNS (1), /* cost of movsx */
1939 COSTS_N_INSNS (1), /* cost of movzx */
1940 8, /* "large" insn */
1941 17, /* MOVE_RATIO */
1942 4, /* cost for loading QImode using movzbl */
1943 {4, 4, 4}, /* cost of loading integer registers
1944 in QImode, HImode and SImode.
1945 Relative to reg-reg move (2). */
1946 {4, 4, 4}, /* cost of storing integer registers */
1947 4, /* cost of reg,reg fld/fst */
1948 {12, 12, 12}, /* cost of loading fp registers
1949 in SFmode, DFmode and XFmode */
1950 {6, 6, 8}, /* cost of storing fp registers
1951 in SFmode, DFmode and XFmode */
1952 2, /* cost of moving MMX register */
1953 {8, 8}, /* cost of loading MMX registers
1954 in SImode and DImode */
1955 {8, 8}, /* cost of storing MMX registers
1956 in SImode and DImode */
1957 2, /* cost of moving SSE register */
1958 {8, 8, 8}, /* cost of loading SSE registers
1959 in SImode, DImode and TImode */
1960 {8, 8, 8}, /* cost of storing SSE registers
1961 in SImode, DImode and TImode */
1962 5, /* MMX or SSE register to integer */
1963 32, /* size of l1 cache. */
1964 512, /* size of l2 cache. */
1965 64, /* size of prefetch block */
1966 6, /* number of parallel prefetches */
1967 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1968 value is increased to perhaps more appropriate value of 5. */
1969 3, /* Branch cost */
1970 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1971 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1972 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1973 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1974 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1975 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1976 generic_memcpy,
1977 generic_memset,
1978 1, /* scalar_stmt_cost. */
1979 1, /* scalar load_cost. */
1980 1, /* scalar_store_cost. */
1981 1, /* vec_stmt_cost. */
1982 1, /* vec_to_scalar_cost. */
1983 1, /* scalar_to_vec_cost. */
1984 1, /* vec_align_load_cost. */
1985 2, /* vec_unalign_load_cost. */
1986 1, /* vec_store_cost. */
1987 3, /* cond_taken_branch_cost. */
1988 1, /* cond_not_taken_branch_cost. */
1991 /* core_cost should produce code tuned for Core familly of CPUs. */
1992 static stringop_algs core_memcpy[2] = {
1993 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1994 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1995 {-1, libcall, false}}}};
1996 static stringop_algs core_memset[2] = {
1997 {libcall, {{6, loop_1_byte, true},
1998 {24, loop, true},
1999 {8192, rep_prefix_4_byte, true},
2000 {-1, libcall, false}}},
2001 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
2002 {-1, libcall, false}}}};
2004 static const
2005 struct processor_costs core_cost = {
2006 COSTS_N_INSNS (1), /* cost of an add instruction */
2007 /* On all chips taken into consideration lea is 2 cycles and more. With
2008 this cost however our current implementation of synth_mult results in
2009 use of unnecessary temporary registers causing regression on several
2010 SPECfp benchmarks. */
2011 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
2012 COSTS_N_INSNS (1), /* variable shift costs */
2013 COSTS_N_INSNS (1), /* constant shift costs */
2014 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
2015 COSTS_N_INSNS (4), /* HI */
2016 COSTS_N_INSNS (3), /* SI */
2017 COSTS_N_INSNS (4), /* DI */
2018 COSTS_N_INSNS (2)}, /* other */
2019 0, /* cost of multiply per each bit set */
2020 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
2021 COSTS_N_INSNS (26), /* HI */
2022 COSTS_N_INSNS (42), /* SI */
2023 COSTS_N_INSNS (74), /* DI */
2024 COSTS_N_INSNS (74)}, /* other */
2025 COSTS_N_INSNS (1), /* cost of movsx */
2026 COSTS_N_INSNS (1), /* cost of movzx */
2027 8, /* "large" insn */
2028 17, /* MOVE_RATIO */
2029 4, /* cost for loading QImode using movzbl */
2030 {4, 4, 4}, /* cost of loading integer registers
2031 in QImode, HImode and SImode.
2032 Relative to reg-reg move (2). */
2033 {4, 4, 4}, /* cost of storing integer registers */
2034 4, /* cost of reg,reg fld/fst */
2035 {12, 12, 12}, /* cost of loading fp registers
2036 in SFmode, DFmode and XFmode */
2037 {6, 6, 8}, /* cost of storing fp registers
2038 in SFmode, DFmode and XFmode */
2039 2, /* cost of moving MMX register */
2040 {8, 8}, /* cost of loading MMX registers
2041 in SImode and DImode */
2042 {8, 8}, /* cost of storing MMX registers
2043 in SImode and DImode */
2044 2, /* cost of moving SSE register */
2045 {8, 8, 8}, /* cost of loading SSE registers
2046 in SImode, DImode and TImode */
2047 {8, 8, 8}, /* cost of storing SSE registers
2048 in SImode, DImode and TImode */
2049 5, /* MMX or SSE register to integer */
2050 64, /* size of l1 cache. */
2051 512, /* size of l2 cache. */
2052 64, /* size of prefetch block */
2053 6, /* number of parallel prefetches */
2054 /* FIXME perhaps more appropriate value is 5. */
2055 3, /* Branch cost */
2056 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2057 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2058 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2059 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2060 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2061 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2062 core_memcpy,
2063 core_memset,
2064 1, /* scalar_stmt_cost. */
2065 1, /* scalar load_cost. */
2066 1, /* scalar_store_cost. */
2067 1, /* vec_stmt_cost. */
2068 1, /* vec_to_scalar_cost. */
2069 1, /* scalar_to_vec_cost. */
2070 1, /* vec_align_load_cost. */
2071 2, /* vec_unalign_load_cost. */
2072 1, /* vec_store_cost. */
2073 3, /* cond_taken_branch_cost. */
2074 1, /* cond_not_taken_branch_cost. */
2078 /* Set by -mtune. */
2079 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2081 /* Set by -mtune or -Os. */
2082 const struct processor_costs *ix86_cost = &pentium_cost;
2084 /* Processor feature/optimization bitmasks. */
2085 #define m_386 (1<<PROCESSOR_I386)
2086 #define m_486 (1<<PROCESSOR_I486)
2087 #define m_PENT (1<<PROCESSOR_PENTIUM)
2088 #define m_IAMCU (1<<PROCESSOR_IAMCU)
2089 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2090 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2091 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2092 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2093 #define m_CORE2 (1<<PROCESSOR_CORE2)
2094 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2095 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2096 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2097 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2098 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2099 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2100 #define m_KNL (1<<PROCESSOR_KNL)
2101 #define m_INTEL (1<<PROCESSOR_INTEL)
2103 #define m_GEODE (1<<PROCESSOR_GEODE)
2104 #define m_K6 (1<<PROCESSOR_K6)
2105 #define m_K6_GEODE (m_K6 | m_GEODE)
2106 #define m_K8 (1<<PROCESSOR_K8)
2107 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2108 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2109 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2110 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2111 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2112 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2113 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2114 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2115 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2116 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2117 #define m_BTVER (m_BTVER1 | m_BTVER2)
2118 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2120 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2122 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2123 #undef DEF_TUNE
2124 #define DEF_TUNE(tune, name, selector) name,
2125 #include "x86-tune.def"
2126 #undef DEF_TUNE
2129 /* Feature tests against the various tunings. */
2130 unsigned char ix86_tune_features[X86_TUNE_LAST];
2132 /* Feature tests against the various tunings used to create ix86_tune_features
2133 based on the processor mask. */
2134 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2135 #undef DEF_TUNE
2136 #define DEF_TUNE(tune, name, selector) selector,
2137 #include "x86-tune.def"
2138 #undef DEF_TUNE
2141 /* Feature tests against the various architecture variations. */
2142 unsigned char ix86_arch_features[X86_ARCH_LAST];
2144 /* Feature tests against the various architecture variations, used to create
2145 ix86_arch_features based on the processor mask. */
2146 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2147 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2148 ~(m_386 | m_486 | m_PENT | m_IAMCU | m_K6),
2150 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2151 ~m_386,
2153 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2154 ~(m_386 | m_486),
2156 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2157 ~m_386,
2159 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2160 ~m_386,
2163 /* In case the average insn count for single function invocation is
2164 lower than this constant, emit fast (but longer) prologue and
2165 epilogue code. */
2166 #define FAST_PROLOGUE_INSN_COUNT 20
2168 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2169 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2170 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2171 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2173 /* Array of the smallest class containing reg number REGNO, indexed by
2174 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2176 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2178 /* ax, dx, cx, bx */
2179 AREG, DREG, CREG, BREG,
2180 /* si, di, bp, sp */
2181 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2182 /* FP registers */
2183 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2184 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2185 /* arg pointer */
2186 NON_Q_REGS,
2187 /* flags, fpsr, fpcr, frame */
2188 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2189 /* SSE registers */
2190 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2191 SSE_REGS, SSE_REGS,
2192 /* MMX registers */
2193 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2194 MMX_REGS, MMX_REGS,
2195 /* REX registers */
2196 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2197 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2198 /* SSE REX registers */
2199 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2200 SSE_REGS, SSE_REGS,
2201 /* AVX-512 SSE registers */
2202 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2203 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2204 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2205 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2206 /* Mask registers. */
2207 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2208 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2209 /* MPX bound registers */
2210 BND_REGS, BND_REGS, BND_REGS, BND_REGS,
2213 /* The "default" register map used in 32bit mode. */
2215 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2217 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2218 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2219 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2220 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2221 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2222 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2223 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2224 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2225 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2226 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2227 101, 102, 103, 104, /* bound registers */
2230 /* The "default" register map used in 64bit mode. */
2232 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2234 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2235 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2236 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2237 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2238 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2239 8,9,10,11,12,13,14,15, /* extended integer registers */
2240 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2241 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2242 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2243 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2244 126, 127, 128, 129, /* bound registers */
2247 /* Define the register numbers to be used in Dwarf debugging information.
2248 The SVR4 reference port C compiler uses the following register numbers
2249 in its Dwarf output code:
2250 0 for %eax (gcc regno = 0)
2251 1 for %ecx (gcc regno = 2)
2252 2 for %edx (gcc regno = 1)
2253 3 for %ebx (gcc regno = 3)
2254 4 for %esp (gcc regno = 7)
2255 5 for %ebp (gcc regno = 6)
2256 6 for %esi (gcc regno = 4)
2257 7 for %edi (gcc regno = 5)
2258 The following three DWARF register numbers are never generated by
2259 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2260 believes these numbers have these meanings.
2261 8 for %eip (no gcc equivalent)
2262 9 for %eflags (gcc regno = 17)
2263 10 for %trapno (no gcc equivalent)
2264 It is not at all clear how we should number the FP stack registers
2265 for the x86 architecture. If the version of SDB on x86/svr4 were
2266 a bit less brain dead with respect to floating-point then we would
2267 have a precedent to follow with respect to DWARF register numbers
2268 for x86 FP registers, but the SDB on x86/svr4 is so completely
2269 broken with respect to FP registers that it is hardly worth thinking
2270 of it as something to strive for compatibility with.
2271 The version of x86/svr4 SDB I have at the moment does (partially)
2272 seem to believe that DWARF register number 11 is associated with
2273 the x86 register %st(0), but that's about all. Higher DWARF
2274 register numbers don't seem to be associated with anything in
2275 particular, and even for DWARF regno 11, SDB only seems to under-
2276 stand that it should say that a variable lives in %st(0) (when
2277 asked via an `=' command) if we said it was in DWARF regno 11,
2278 but SDB still prints garbage when asked for the value of the
2279 variable in question (via a `/' command).
2280 (Also note that the labels SDB prints for various FP stack regs
2281 when doing an `x' command are all wrong.)
2282 Note that these problems generally don't affect the native SVR4
2283 C compiler because it doesn't allow the use of -O with -g and
2284 because when it is *not* optimizing, it allocates a memory
2285 location for each floating-point variable, and the memory
2286 location is what gets described in the DWARF AT_location
2287 attribute for the variable in question.
2288 Regardless of the severe mental illness of the x86/svr4 SDB, we
2289 do something sensible here and we use the following DWARF
2290 register numbers. Note that these are all stack-top-relative
2291 numbers.
2292 11 for %st(0) (gcc regno = 8)
2293 12 for %st(1) (gcc regno = 9)
2294 13 for %st(2) (gcc regno = 10)
2295 14 for %st(3) (gcc regno = 11)
2296 15 for %st(4) (gcc regno = 12)
2297 16 for %st(5) (gcc regno = 13)
2298 17 for %st(6) (gcc regno = 14)
2299 18 for %st(7) (gcc regno = 15)
2301 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2303 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2304 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2305 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2306 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2307 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2308 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2309 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2310 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2311 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2312 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2313 101, 102, 103, 104, /* bound registers */
2316 /* Define parameter passing and return registers. */
2318 static int const x86_64_int_parameter_registers[6] =
2320 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2323 static int const x86_64_ms_abi_int_parameter_registers[4] =
2325 CX_REG, DX_REG, R8_REG, R9_REG
2328 static int const x86_64_int_return_registers[4] =
2330 AX_REG, DX_REG, DI_REG, SI_REG
2333 /* Additional registers that are clobbered by SYSV calls. */
2335 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2337 SI_REG, DI_REG,
2338 XMM6_REG, XMM7_REG,
2339 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2340 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2343 /* Define the structure for the machine field in struct function. */
2345 struct GTY(()) stack_local_entry {
2346 unsigned short mode;
2347 unsigned short n;
2348 rtx rtl;
2349 struct stack_local_entry *next;
2352 /* Structure describing stack frame layout.
2353 Stack grows downward:
2355 [arguments]
2356 <- ARG_POINTER
2357 saved pc
2359 saved static chain if ix86_static_chain_on_stack
2361 saved frame pointer if frame_pointer_needed
2362 <- HARD_FRAME_POINTER
2363 [saved regs]
2364 <- regs_save_offset
2365 [padding0]
2367 [saved SSE regs]
2368 <- sse_regs_save_offset
2369 [padding1] |
2370 | <- FRAME_POINTER
2371 [va_arg registers] |
2373 [frame] |
2375 [padding2] | = to_allocate
2376 <- STACK_POINTER
2378 struct ix86_frame
2380 int nsseregs;
2381 int nregs;
2382 int va_arg_size;
2383 int red_zone_size;
2384 int outgoing_arguments_size;
2386 /* The offsets relative to ARG_POINTER. */
2387 HOST_WIDE_INT frame_pointer_offset;
2388 HOST_WIDE_INT hard_frame_pointer_offset;
2389 HOST_WIDE_INT stack_pointer_offset;
2390 HOST_WIDE_INT hfp_save_offset;
2391 HOST_WIDE_INT reg_save_offset;
2392 HOST_WIDE_INT sse_reg_save_offset;
2394 /* When save_regs_using_mov is set, emit prologue using
2395 move instead of push instructions. */
2396 bool save_regs_using_mov;
2399 /* Which cpu are we scheduling for. */
2400 enum attr_cpu ix86_schedule;
2402 /* Which cpu are we optimizing for. */
2403 enum processor_type ix86_tune;
2405 /* Which instruction set architecture to use. */
2406 enum processor_type ix86_arch;
2408 /* True if processor has SSE prefetch instruction. */
2409 unsigned char x86_prefetch_sse;
2411 /* -mstackrealign option */
2412 static const char ix86_force_align_arg_pointer_string[]
2413 = "force_align_arg_pointer";
2415 static rtx (*ix86_gen_leave) (void);
2416 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2417 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2418 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2419 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2420 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2421 static rtx (*ix86_gen_monitorx) (rtx, rtx, rtx);
2422 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2423 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2424 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2425 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2426 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2427 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2429 /* Preferred alignment for stack boundary in bits. */
2430 unsigned int ix86_preferred_stack_boundary;
2432 /* Alignment for incoming stack boundary in bits specified at
2433 command line. */
2434 static unsigned int ix86_user_incoming_stack_boundary;
2436 /* Default alignment for incoming stack boundary in bits. */
2437 static unsigned int ix86_default_incoming_stack_boundary;
2439 /* Alignment for incoming stack boundary in bits. */
2440 unsigned int ix86_incoming_stack_boundary;
2442 /* Calling abi specific va_list type nodes. */
2443 static GTY(()) tree sysv_va_list_type_node;
2444 static GTY(()) tree ms_va_list_type_node;
2446 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2447 char internal_label_prefix[16];
2448 int internal_label_prefix_len;
2450 /* Fence to use after loop using movnt. */
2451 tree x86_mfence;
2453 /* Register class used for passing given 64bit part of the argument.
2454 These represent classes as documented by the PS ABI, with the exception
2455 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2456 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2458 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2459 whenever possible (upper half does contain padding). */
2460 enum x86_64_reg_class
2462 X86_64_NO_CLASS,
2463 X86_64_INTEGER_CLASS,
2464 X86_64_INTEGERSI_CLASS,
2465 X86_64_SSE_CLASS,
2466 X86_64_SSESF_CLASS,
2467 X86_64_SSEDF_CLASS,
2468 X86_64_SSEUP_CLASS,
2469 X86_64_X87_CLASS,
2470 X86_64_X87UP_CLASS,
2471 X86_64_COMPLEX_X87_CLASS,
2472 X86_64_MEMORY_CLASS
2475 #define MAX_CLASSES 8
2477 /* Table of constants used by fldpi, fldln2, etc.... */
2478 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2479 static bool ext_80387_constants_init = 0;
2482 static struct machine_function * ix86_init_machine_status (void);
2483 static rtx ix86_function_value (const_tree, const_tree, bool);
2484 static bool ix86_function_value_regno_p (const unsigned int);
2485 static unsigned int ix86_function_arg_boundary (machine_mode,
2486 const_tree);
2487 static rtx ix86_static_chain (const_tree, bool);
2488 static int ix86_function_regparm (const_tree, const_tree);
2489 static void ix86_compute_frame_layout (struct ix86_frame *);
2490 static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
2491 rtx, rtx, int);
2492 static void ix86_add_new_builtins (HOST_WIDE_INT);
2493 static tree ix86_canonical_va_list_type (tree);
2494 static void predict_jump (int);
2495 static unsigned int split_stack_prologue_scratch_regno (void);
2496 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2498 enum ix86_function_specific_strings
2500 IX86_FUNCTION_SPECIFIC_ARCH,
2501 IX86_FUNCTION_SPECIFIC_TUNE,
2502 IX86_FUNCTION_SPECIFIC_MAX
2505 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2506 const char *, enum fpmath_unit, bool);
2507 static void ix86_function_specific_save (struct cl_target_option *,
2508 struct gcc_options *opts);
2509 static void ix86_function_specific_restore (struct gcc_options *opts,
2510 struct cl_target_option *);
2511 static void ix86_function_specific_post_stream_in (struct cl_target_option *);
2512 static void ix86_function_specific_print (FILE *, int,
2513 struct cl_target_option *);
2514 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2515 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2516 struct gcc_options *,
2517 struct gcc_options *,
2518 struct gcc_options *);
2519 static bool ix86_can_inline_p (tree, tree);
2520 static void ix86_set_current_function (tree);
2521 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2523 static enum calling_abi ix86_function_abi (const_tree);
2526 #ifndef SUBTARGET32_DEFAULT_CPU
2527 #define SUBTARGET32_DEFAULT_CPU "i386"
2528 #endif
2530 /* Whether -mtune= or -march= were specified */
2531 static int ix86_tune_defaulted;
2532 static int ix86_arch_specified;
2534 /* Vectorization library interface and handlers. */
2535 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2537 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2538 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2540 /* Processor target table, indexed by processor number */
2541 struct ptt
2543 const char *const name; /* processor name */
2544 const struct processor_costs *cost; /* Processor costs */
2545 const int align_loop; /* Default alignments. */
2546 const int align_loop_max_skip;
2547 const int align_jump;
2548 const int align_jump_max_skip;
2549 const int align_func;
2552 /* This table must be in sync with enum processor_type in i386.h. */
2553 static const struct ptt processor_target_table[PROCESSOR_max] =
2555 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2556 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2557 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2558 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2559 {"iamcu", &iamcu_cost, 16, 7, 16, 7, 16},
2560 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2561 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2562 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2563 {"core2", &core_cost, 16, 10, 16, 10, 16},
2564 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2565 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2566 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2567 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2568 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2569 {"knl", &slm_cost, 16, 15, 16, 7, 16},
2570 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2571 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2572 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2573 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2574 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2575 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2576 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2577 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2578 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2579 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2580 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2581 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2584 static unsigned int
2585 rest_of_handle_insert_vzeroupper (void)
2587 int i;
2589 /* vzeroupper instructions are inserted immediately after reload to
2590 account for possible spills from 256bit registers. The pass
2591 reuses mode switching infrastructure by re-running mode insertion
2592 pass, so disable entities that have already been processed. */
2593 for (i = 0; i < MAX_386_ENTITIES; i++)
2594 ix86_optimize_mode_switching[i] = 0;
2596 ix86_optimize_mode_switching[AVX_U128] = 1;
2598 /* Call optimize_mode_switching. */
2599 g->get_passes ()->execute_pass_mode_switching ();
2600 return 0;
2603 namespace {
2605 const pass_data pass_data_insert_vzeroupper =
2607 RTL_PASS, /* type */
2608 "vzeroupper", /* name */
2609 OPTGROUP_NONE, /* optinfo_flags */
2610 TV_NONE, /* tv_id */
2611 0, /* properties_required */
2612 0, /* properties_provided */
2613 0, /* properties_destroyed */
2614 0, /* todo_flags_start */
2615 TODO_df_finish, /* todo_flags_finish */
2618 class pass_insert_vzeroupper : public rtl_opt_pass
2620 public:
2621 pass_insert_vzeroupper(gcc::context *ctxt)
2622 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2625 /* opt_pass methods: */
2626 virtual bool gate (function *)
2628 return TARGET_AVX && !TARGET_AVX512F
2629 && TARGET_VZEROUPPER && flag_expensive_optimizations
2630 && !optimize_size;
2633 virtual unsigned int execute (function *)
2635 return rest_of_handle_insert_vzeroupper ();
2638 }; // class pass_insert_vzeroupper
2640 } // anon namespace
2642 rtl_opt_pass *
2643 make_pass_insert_vzeroupper (gcc::context *ctxt)
2645 return new pass_insert_vzeroupper (ctxt);
2648 /* Return true if a red-zone is in use. */
2650 static inline bool
2651 ix86_using_red_zone (void)
2653 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2656 /* Return a string that documents the current -m options. The caller is
2657 responsible for freeing the string. */
2659 static char *
2660 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2661 const char *tune, enum fpmath_unit fpmath,
2662 bool add_nl_p)
2664 struct ix86_target_opts
2666 const char *option; /* option string */
2667 HOST_WIDE_INT mask; /* isa mask options */
2670 /* This table is ordered so that options like -msse4.2 that imply
2671 preceding options while match those first. */
2672 static struct ix86_target_opts isa_opts[] =
2674 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2675 { "-mfma", OPTION_MASK_ISA_FMA },
2676 { "-mxop", OPTION_MASK_ISA_XOP },
2677 { "-mlwp", OPTION_MASK_ISA_LWP },
2678 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2679 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2680 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2681 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2682 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
2683 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
2684 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
2685 { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA },
2686 { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI },
2687 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2688 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2689 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2690 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2691 { "-msse3", OPTION_MASK_ISA_SSE3 },
2692 { "-msse2", OPTION_MASK_ISA_SSE2 },
2693 { "-msse", OPTION_MASK_ISA_SSE },
2694 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2695 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2696 { "-mmmx", OPTION_MASK_ISA_MMX },
2697 { "-mabm", OPTION_MASK_ISA_ABM },
2698 { "-mbmi", OPTION_MASK_ISA_BMI },
2699 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2700 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2701 { "-mhle", OPTION_MASK_ISA_HLE },
2702 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2703 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2704 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2705 { "-madx", OPTION_MASK_ISA_ADX },
2706 { "-mtbm", OPTION_MASK_ISA_TBM },
2707 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2708 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2709 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2710 { "-maes", OPTION_MASK_ISA_AES },
2711 { "-msha", OPTION_MASK_ISA_SHA },
2712 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2713 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2714 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2715 { "-mf16c", OPTION_MASK_ISA_F16C },
2716 { "-mrtm", OPTION_MASK_ISA_RTM },
2717 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2718 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2719 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2720 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2721 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2722 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2723 { "-mmpx", OPTION_MASK_ISA_MPX },
2724 { "-mclwb", OPTION_MASK_ISA_CLWB },
2725 { "-mpcommit", OPTION_MASK_ISA_PCOMMIT },
2726 { "-mmwaitx", OPTION_MASK_ISA_MWAITX },
2729 /* Flag options. */
2730 static struct ix86_target_opts flag_opts[] =
2732 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2733 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2734 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2735 { "-m80387", MASK_80387 },
2736 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2737 { "-malign-double", MASK_ALIGN_DOUBLE },
2738 { "-mcld", MASK_CLD },
2739 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2740 { "-mieee-fp", MASK_IEEE_FP },
2741 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2742 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2743 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2744 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2745 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2746 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2747 { "-mno-red-zone", MASK_NO_RED_ZONE },
2748 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2749 { "-mrecip", MASK_RECIP },
2750 { "-mrtd", MASK_RTD },
2751 { "-msseregparm", MASK_SSEREGPARM },
2752 { "-mstack-arg-probe", MASK_STACK_PROBE },
2753 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2754 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2755 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2756 { "-mvzeroupper", MASK_VZEROUPPER },
2757 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2758 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2759 { "-mprefer-avx128", MASK_PREFER_AVX128},
2762 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2764 char isa_other[40];
2765 char target_other[40];
2766 unsigned num = 0;
2767 unsigned i, j;
2768 char *ret;
2769 char *ptr;
2770 size_t len;
2771 size_t line_len;
2772 size_t sep_len;
2773 const char *abi;
2775 memset (opts, '\0', sizeof (opts));
2777 /* Add -march= option. */
2778 if (arch)
2780 opts[num][0] = "-march=";
2781 opts[num++][1] = arch;
2784 /* Add -mtune= option. */
2785 if (tune)
2787 opts[num][0] = "-mtune=";
2788 opts[num++][1] = tune;
2791 /* Add -m32/-m64/-mx32. */
2792 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2794 if ((isa & OPTION_MASK_ABI_64) != 0)
2795 abi = "-m64";
2796 else
2797 abi = "-mx32";
2798 isa &= ~ (OPTION_MASK_ISA_64BIT
2799 | OPTION_MASK_ABI_64
2800 | OPTION_MASK_ABI_X32);
2802 else
2803 abi = "-m32";
2804 opts[num++][0] = abi;
2806 /* Pick out the options in isa options. */
2807 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2809 if ((isa & isa_opts[i].mask) != 0)
2811 opts[num++][0] = isa_opts[i].option;
2812 isa &= ~ isa_opts[i].mask;
2816 if (isa && add_nl_p)
2818 opts[num++][0] = isa_other;
2819 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2820 isa);
2823 /* Add flag options. */
2824 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2826 if ((flags & flag_opts[i].mask) != 0)
2828 opts[num++][0] = flag_opts[i].option;
2829 flags &= ~ flag_opts[i].mask;
2833 if (flags && add_nl_p)
2835 opts[num++][0] = target_other;
2836 sprintf (target_other, "(other flags: %#x)", flags);
2839 /* Add -fpmath= option. */
2840 if (fpmath)
2842 opts[num][0] = "-mfpmath=";
2843 switch ((int) fpmath)
2845 case FPMATH_387:
2846 opts[num++][1] = "387";
2847 break;
2849 case FPMATH_SSE:
2850 opts[num++][1] = "sse";
2851 break;
2853 case FPMATH_387 | FPMATH_SSE:
2854 opts[num++][1] = "sse+387";
2855 break;
2857 default:
2858 gcc_unreachable ();
2862 /* Any options? */
2863 if (num == 0)
2864 return NULL;
2866 gcc_assert (num < ARRAY_SIZE (opts));
2868 /* Size the string. */
2869 len = 0;
2870 sep_len = (add_nl_p) ? 3 : 1;
2871 for (i = 0; i < num; i++)
2873 len += sep_len;
2874 for (j = 0; j < 2; j++)
2875 if (opts[i][j])
2876 len += strlen (opts[i][j]);
2879 /* Build the string. */
2880 ret = ptr = (char *) xmalloc (len);
2881 line_len = 0;
2883 for (i = 0; i < num; i++)
2885 size_t len2[2];
2887 for (j = 0; j < 2; j++)
2888 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2890 if (i != 0)
2892 *ptr++ = ' ';
2893 line_len++;
2895 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2897 *ptr++ = '\\';
2898 *ptr++ = '\n';
2899 line_len = 0;
2903 for (j = 0; j < 2; j++)
2904 if (opts[i][j])
2906 memcpy (ptr, opts[i][j], len2[j]);
2907 ptr += len2[j];
2908 line_len += len2[j];
2912 *ptr = '\0';
2913 gcc_assert (ret + len >= ptr);
2915 return ret;
2918 /* Return true, if profiling code should be emitted before
2919 prologue. Otherwise it returns false.
2920 Note: For x86 with "hotfix" it is sorried. */
2921 static bool
2922 ix86_profile_before_prologue (void)
2924 return flag_fentry != 0;
2927 /* Function that is callable from the debugger to print the current
2928 options. */
2929 void ATTRIBUTE_UNUSED
2930 ix86_debug_options (void)
2932 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2933 ix86_arch_string, ix86_tune_string,
2934 ix86_fpmath, true);
2936 if (opts)
2938 fprintf (stderr, "%s\n\n", opts);
2939 free (opts);
2941 else
2942 fputs ("<no options>\n\n", stderr);
2944 return;
2947 static const char *stringop_alg_names[] = {
2948 #define DEF_ENUM
2949 #define DEF_ALG(alg, name) #name,
2950 #include "stringop.def"
2951 #undef DEF_ENUM
2952 #undef DEF_ALG
2955 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2956 The string is of the following form (or comma separated list of it):
2958 strategy_alg:max_size:[align|noalign]
2960 where the full size range for the strategy is either [0, max_size] or
2961 [min_size, max_size], in which min_size is the max_size + 1 of the
2962 preceding range. The last size range must have max_size == -1.
2964 Examples:
2967 -mmemcpy-strategy=libcall:-1:noalign
2969 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2973 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2975 This is to tell the compiler to use the following strategy for memset
2976 1) when the expected size is between [1, 16], use rep_8byte strategy;
2977 2) when the size is between [17, 2048], use vector_loop;
2978 3) when the size is > 2048, use libcall. */
2980 struct stringop_size_range
2982 int max;
2983 stringop_alg alg;
2984 bool noalign;
2987 static void
2988 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2990 const struct stringop_algs *default_algs;
2991 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2992 char *curr_range_str, *next_range_str;
2993 int i = 0, n = 0;
2995 if (is_memset)
2996 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2997 else
2998 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
3000 curr_range_str = strategy_str;
3004 int maxs;
3005 char alg_name[128];
3006 char align[16];
3007 next_range_str = strchr (curr_range_str, ',');
3008 if (next_range_str)
3009 *next_range_str++ = '\0';
3011 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
3012 alg_name, &maxs, align))
3014 error ("wrong arg %s to option %s", curr_range_str,
3015 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3016 return;
3019 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
3021 error ("size ranges of option %s should be increasing",
3022 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3023 return;
3026 for (i = 0; i < last_alg; i++)
3027 if (!strcmp (alg_name, stringop_alg_names[i]))
3028 break;
3030 if (i == last_alg)
3032 error ("wrong stringop strategy name %s specified for option %s",
3033 alg_name,
3034 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3035 return;
3038 if ((stringop_alg) i == rep_prefix_8_byte
3039 && !TARGET_64BIT)
3041 /* rep; movq isn't available in 32-bit code. */
3042 error ("stringop strategy name %s specified for option %s "
3043 "not supported for 32-bit code",
3044 alg_name,
3045 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3046 return;
3049 input_ranges[n].max = maxs;
3050 input_ranges[n].alg = (stringop_alg) i;
3051 if (!strcmp (align, "align"))
3052 input_ranges[n].noalign = false;
3053 else if (!strcmp (align, "noalign"))
3054 input_ranges[n].noalign = true;
3055 else
3057 error ("unknown alignment %s specified for option %s",
3058 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3059 return;
3061 n++;
3062 curr_range_str = next_range_str;
3064 while (curr_range_str);
3066 if (input_ranges[n - 1].max != -1)
3068 error ("the max value for the last size range should be -1"
3069 " for option %s",
3070 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3071 return;
3074 if (n > MAX_STRINGOP_ALGS)
3076 error ("too many size ranges specified in option %s",
3077 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3078 return;
3081 /* Now override the default algs array. */
3082 for (i = 0; i < n; i++)
3084 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
3085 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
3086 = input_ranges[i].alg;
3087 *const_cast<int *>(&default_algs->size[i].noalign)
3088 = input_ranges[i].noalign;
3093 /* parse -mtune-ctrl= option. When DUMP is true,
3094 print the features that are explicitly set. */
3096 static void
3097 parse_mtune_ctrl_str (bool dump)
3099 if (!ix86_tune_ctrl_string)
3100 return;
3102 char *next_feature_string = NULL;
3103 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3104 char *orig = curr_feature_string;
3105 int i;
3108 bool clear = false;
3110 next_feature_string = strchr (curr_feature_string, ',');
3111 if (next_feature_string)
3112 *next_feature_string++ = '\0';
3113 if (*curr_feature_string == '^')
3115 curr_feature_string++;
3116 clear = true;
3118 for (i = 0; i < X86_TUNE_LAST; i++)
3120 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3122 ix86_tune_features[i] = !clear;
3123 if (dump)
3124 fprintf (stderr, "Explicitly %s feature %s\n",
3125 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3126 break;
3129 if (i == X86_TUNE_LAST)
3130 error ("Unknown parameter to option -mtune-ctrl: %s",
3131 clear ? curr_feature_string - 1 : curr_feature_string);
3132 curr_feature_string = next_feature_string;
3134 while (curr_feature_string);
3135 free (orig);
3138 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3139 processor type. */
3141 static void
3142 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3144 unsigned int ix86_tune_mask = 1u << ix86_tune;
3145 int i;
3147 for (i = 0; i < X86_TUNE_LAST; ++i)
3149 if (ix86_tune_no_default)
3150 ix86_tune_features[i] = 0;
3151 else
3152 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3155 if (dump)
3157 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3158 for (i = 0; i < X86_TUNE_LAST; i++)
3159 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3160 ix86_tune_features[i] ? "on" : "off");
3163 parse_mtune_ctrl_str (dump);
3167 /* Default align_* from the processor table. */
3169 static void
3170 ix86_default_align (struct gcc_options *opts)
3172 if (opts->x_align_loops == 0)
3174 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3175 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3177 if (opts->x_align_jumps == 0)
3179 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3180 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3182 if (opts->x_align_functions == 0)
3184 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3188 /* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook. */
3190 static void
3191 ix86_override_options_after_change (void)
3193 ix86_default_align (&global_options);
3196 /* Override various settings based on options. If MAIN_ARGS_P, the
3197 options are from the command line, otherwise they are from
3198 attributes. */
3200 static void
3201 ix86_option_override_internal (bool main_args_p,
3202 struct gcc_options *opts,
3203 struct gcc_options *opts_set)
3205 int i;
3206 unsigned int ix86_arch_mask;
3207 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3208 const char *prefix;
3209 const char *suffix;
3210 const char *sw;
3212 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3213 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3214 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3215 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3216 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3217 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3218 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3219 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3220 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3221 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3222 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3223 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3224 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3225 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3226 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3227 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3228 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3229 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3230 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3231 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3232 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3233 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3234 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3235 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3236 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3237 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3238 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3239 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3240 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3241 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3242 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3243 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3244 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3245 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3246 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3247 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3248 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3249 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3250 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3251 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3252 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3253 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3254 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3255 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3256 #define PTA_MPX (HOST_WIDE_INT_1 << 44)
3257 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3258 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3259 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3260 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3261 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3262 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
3263 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
3264 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
3265 #define PTA_AVX512IFMA (HOST_WIDE_INT_1 << 53)
3266 #define PTA_AVX512VBMI (HOST_WIDE_INT_1 << 54)
3267 #define PTA_CLWB (HOST_WIDE_INT_1 << 55)
3268 #define PTA_PCOMMIT (HOST_WIDE_INT_1 << 56)
3269 #define PTA_MWAITX (HOST_WIDE_INT_1 << 57)
3271 #define PTA_CORE2 \
3272 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3273 | PTA_CX16 | PTA_FXSR)
3274 #define PTA_NEHALEM \
3275 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3276 #define PTA_WESTMERE \
3277 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3278 #define PTA_SANDYBRIDGE \
3279 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3280 #define PTA_IVYBRIDGE \
3281 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3282 #define PTA_HASWELL \
3283 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3284 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3285 #define PTA_BROADWELL \
3286 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3287 #define PTA_SKYLAKE \
3288 (PTA_BROADWELL | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES)
3289 #define PTA_KNL \
3290 (PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD)
3291 #define PTA_BONNELL \
3292 (PTA_CORE2 | PTA_MOVBE)
3293 #define PTA_SILVERMONT \
3294 (PTA_WESTMERE | PTA_MOVBE)
3296 /* if this reaches 64, need to widen struct pta flags below */
3298 static struct pta
3300 const char *const name; /* processor name or nickname. */
3301 const enum processor_type processor;
3302 const enum attr_cpu schedule;
3303 const unsigned HOST_WIDE_INT flags;
3305 const processor_alias_table[] =
3307 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3308 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3309 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3310 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3311 {"iamcu", PROCESSOR_IAMCU, CPU_PENTIUM, 0},
3312 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3313 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3314 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3315 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3316 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3317 PTA_MMX | PTA_SSE | PTA_FXSR},
3318 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3319 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3320 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3321 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3322 PTA_MMX | PTA_SSE | PTA_FXSR},
3323 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3324 PTA_MMX | PTA_SSE | PTA_FXSR},
3325 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3326 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3327 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3328 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3329 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3330 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3331 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3332 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3333 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3334 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3335 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3336 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3337 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3338 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3339 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3340 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3341 PTA_SANDYBRIDGE},
3342 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3343 PTA_SANDYBRIDGE},
3344 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3345 PTA_IVYBRIDGE},
3346 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3347 PTA_IVYBRIDGE},
3348 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3349 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3350 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3351 {"skylake", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_SKYLAKE},
3352 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3353 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3354 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3355 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3356 {"knl", PROCESSOR_KNL, CPU_SLM, PTA_KNL},
3357 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3358 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3359 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3360 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3361 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3362 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3363 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3364 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3365 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3366 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3367 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3368 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3369 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3370 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3371 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3372 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3373 {"x86-64", PROCESSOR_K8, CPU_K8,
3374 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3375 {"k8", PROCESSOR_K8, CPU_K8,
3376 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3377 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3378 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3379 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3380 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3381 {"opteron", PROCESSOR_K8, CPU_K8,
3382 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3383 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3384 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3385 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3386 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3387 {"athlon64", PROCESSOR_K8, CPU_K8,
3388 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3389 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3390 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3391 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3392 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3393 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3394 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3395 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3396 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3397 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3398 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3399 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3400 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3401 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3402 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3403 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3404 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3405 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3406 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3407 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3408 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3409 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3410 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3411 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3412 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3413 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3414 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3415 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3416 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3417 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3418 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3419 | PTA_XSAVEOPT | PTA_FSGSBASE},
3420 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3421 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3422 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3423 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3424 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3425 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3426 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3427 | PTA_MOVBE | PTA_MWAITX},
3428 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3429 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3430 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3431 | PTA_FXSR | PTA_XSAVE},
3432 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3433 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3434 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3435 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3436 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3437 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3439 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3440 PTA_64BIT
3441 | PTA_HLE /* flags are only used for -march switch. */ },
3444 /* -mrecip options. */
3445 static struct
3447 const char *string; /* option name */
3448 unsigned int mask; /* mask bits to set */
3450 const recip_options[] =
3452 { "all", RECIP_MASK_ALL },
3453 { "none", RECIP_MASK_NONE },
3454 { "div", RECIP_MASK_DIV },
3455 { "sqrt", RECIP_MASK_SQRT },
3456 { "vec-div", RECIP_MASK_VEC_DIV },
3457 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3460 int const pta_size = ARRAY_SIZE (processor_alias_table);
3462 /* Set up prefix/suffix so the error messages refer to either the command
3463 line argument, or the attribute(target). */
3464 if (main_args_p)
3466 prefix = "-m";
3467 suffix = "";
3468 sw = "switch";
3470 else
3472 prefix = "option(\"";
3473 suffix = "\")";
3474 sw = "attribute";
3477 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3478 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3479 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3480 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3481 #ifdef TARGET_BI_ARCH
3482 else
3484 #if TARGET_BI_ARCH == 1
3485 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3486 is on and OPTION_MASK_ABI_X32 is off. We turn off
3487 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3488 -mx32. */
3489 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3490 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3491 #else
3492 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3493 on and OPTION_MASK_ABI_64 is off. We turn off
3494 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3495 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
3496 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
3497 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
3498 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3499 #endif
3500 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3501 && TARGET_IAMCU_P (opts->x_target_flags))
3502 sorry ("Intel MCU psABI isn%'t supported in %s mode",
3503 TARGET_X32_P (opts->x_ix86_isa_flags) ? "x32" : "64-bit");
3505 #endif
3507 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3509 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3510 OPTION_MASK_ABI_64 for TARGET_X32. */
3511 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3512 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3514 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3515 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3516 | OPTION_MASK_ABI_X32
3517 | OPTION_MASK_ABI_64);
3518 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3520 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3521 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3522 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3523 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3526 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3527 SUBTARGET_OVERRIDE_OPTIONS;
3528 #endif
3530 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3531 SUBSUBTARGET_OVERRIDE_OPTIONS;
3532 #endif
3534 /* -fPIC is the default for x86_64. */
3535 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3536 opts->x_flag_pic = 2;
3538 /* Need to check -mtune=generic first. */
3539 if (opts->x_ix86_tune_string)
3541 /* As special support for cross compilers we read -mtune=native
3542 as -mtune=generic. With native compilers we won't see the
3543 -mtune=native, as it was changed by the driver. */
3544 if (!strcmp (opts->x_ix86_tune_string, "native"))
3546 opts->x_ix86_tune_string = "generic";
3548 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3549 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3550 "%stune=k8%s or %stune=generic%s instead as appropriate",
3551 prefix, suffix, prefix, suffix, prefix, suffix);
3553 else
3555 if (opts->x_ix86_arch_string)
3556 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3557 if (!opts->x_ix86_tune_string)
3559 opts->x_ix86_tune_string
3560 = processor_target_table[TARGET_CPU_DEFAULT].name;
3561 ix86_tune_defaulted = 1;
3564 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3565 or defaulted. We need to use a sensible tune option. */
3566 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3568 opts->x_ix86_tune_string = "generic";
3572 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3573 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3575 /* rep; movq isn't available in 32-bit code. */
3576 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3577 opts->x_ix86_stringop_alg = no_stringop;
3580 if (!opts->x_ix86_arch_string)
3581 opts->x_ix86_arch_string
3582 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3583 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3584 else
3585 ix86_arch_specified = 1;
3587 if (opts_set->x_ix86_pmode)
3589 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3590 && opts->x_ix86_pmode == PMODE_SI)
3591 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3592 && opts->x_ix86_pmode == PMODE_DI))
3593 error ("address mode %qs not supported in the %s bit mode",
3594 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3595 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3597 else
3598 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3599 ? PMODE_DI : PMODE_SI;
3601 if (!opts_set->x_ix86_abi)
3602 opts->x_ix86_abi = DEFAULT_ABI;
3604 /* For targets using ms ABI enable ms-extensions, if not
3605 explicit turned off. For non-ms ABI we turn off this
3606 option. */
3607 if (!opts_set->x_flag_ms_extensions)
3608 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3610 if (opts_set->x_ix86_cmodel)
3612 switch (opts->x_ix86_cmodel)
3614 case CM_SMALL:
3615 case CM_SMALL_PIC:
3616 if (opts->x_flag_pic)
3617 opts->x_ix86_cmodel = CM_SMALL_PIC;
3618 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3619 error ("code model %qs not supported in the %s bit mode",
3620 "small", "32");
3621 break;
3623 case CM_MEDIUM:
3624 case CM_MEDIUM_PIC:
3625 if (opts->x_flag_pic)
3626 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3627 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3628 error ("code model %qs not supported in the %s bit mode",
3629 "medium", "32");
3630 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3631 error ("code model %qs not supported in x32 mode",
3632 "medium");
3633 break;
3635 case CM_LARGE:
3636 case CM_LARGE_PIC:
3637 if (opts->x_flag_pic)
3638 opts->x_ix86_cmodel = CM_LARGE_PIC;
3639 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3640 error ("code model %qs not supported in the %s bit mode",
3641 "large", "32");
3642 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3643 error ("code model %qs not supported in x32 mode",
3644 "large");
3645 break;
3647 case CM_32:
3648 if (opts->x_flag_pic)
3649 error ("code model %s does not support PIC mode", "32");
3650 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3651 error ("code model %qs not supported in the %s bit mode",
3652 "32", "64");
3653 break;
3655 case CM_KERNEL:
3656 if (opts->x_flag_pic)
3658 error ("code model %s does not support PIC mode", "kernel");
3659 opts->x_ix86_cmodel = CM_32;
3661 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3662 error ("code model %qs not supported in the %s bit mode",
3663 "kernel", "32");
3664 break;
3666 default:
3667 gcc_unreachable ();
3670 else
3672 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3673 use of rip-relative addressing. This eliminates fixups that
3674 would otherwise be needed if this object is to be placed in a
3675 DLL, and is essentially just as efficient as direct addressing. */
3676 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3677 && (TARGET_RDOS || TARGET_PECOFF))
3678 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3679 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3680 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3681 else
3682 opts->x_ix86_cmodel = CM_32;
3684 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3686 error ("-masm=intel not supported in this configuration");
3687 opts->x_ix86_asm_dialect = ASM_ATT;
3689 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3690 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3691 sorry ("%i-bit mode not compiled in",
3692 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3694 for (i = 0; i < pta_size; i++)
3695 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3697 ix86_schedule = processor_alias_table[i].schedule;
3698 ix86_arch = processor_alias_table[i].processor;
3699 /* Default cpu tuning to the architecture. */
3700 ix86_tune = ix86_arch;
3702 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3703 && !(processor_alias_table[i].flags & PTA_64BIT))
3704 error ("CPU you selected does not support x86-64 "
3705 "instruction set");
3707 if (processor_alias_table[i].flags & PTA_MMX
3708 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3709 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3710 if (processor_alias_table[i].flags & PTA_3DNOW
3711 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3712 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3713 if (processor_alias_table[i].flags & PTA_3DNOW_A
3714 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3715 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3716 if (processor_alias_table[i].flags & PTA_SSE
3717 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3718 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3719 if (processor_alias_table[i].flags & PTA_SSE2
3720 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3721 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3722 if (processor_alias_table[i].flags & PTA_SSE3
3723 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3724 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3725 if (processor_alias_table[i].flags & PTA_SSSE3
3726 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3727 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3728 if (processor_alias_table[i].flags & PTA_SSE4_1
3729 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3730 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3731 if (processor_alias_table[i].flags & PTA_SSE4_2
3732 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3733 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3734 if (processor_alias_table[i].flags & PTA_AVX
3735 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3736 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3737 if (processor_alias_table[i].flags & PTA_AVX2
3738 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3739 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3740 if (processor_alias_table[i].flags & PTA_FMA
3741 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3742 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3743 if (processor_alias_table[i].flags & PTA_SSE4A
3744 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3745 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3746 if (processor_alias_table[i].flags & PTA_FMA4
3747 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3748 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3749 if (processor_alias_table[i].flags & PTA_XOP
3750 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3751 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3752 if (processor_alias_table[i].flags & PTA_LWP
3753 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3754 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3755 if (processor_alias_table[i].flags & PTA_ABM
3756 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3757 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3758 if (processor_alias_table[i].flags & PTA_BMI
3759 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3760 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3761 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3762 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3763 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3764 if (processor_alias_table[i].flags & PTA_TBM
3765 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3766 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3767 if (processor_alias_table[i].flags & PTA_BMI2
3768 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3769 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3770 if (processor_alias_table[i].flags & PTA_CX16
3771 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3772 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3773 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3774 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3775 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3776 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3777 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3778 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3779 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3780 if (processor_alias_table[i].flags & PTA_MOVBE
3781 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3782 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3783 if (processor_alias_table[i].flags & PTA_AES
3784 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3785 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3786 if (processor_alias_table[i].flags & PTA_SHA
3787 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3788 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3789 if (processor_alias_table[i].flags & PTA_PCLMUL
3790 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3791 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3792 if (processor_alias_table[i].flags & PTA_FSGSBASE
3793 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3794 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3795 if (processor_alias_table[i].flags & PTA_RDRND
3796 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3797 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3798 if (processor_alias_table[i].flags & PTA_F16C
3799 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3800 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3801 if (processor_alias_table[i].flags & PTA_RTM
3802 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3803 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3804 if (processor_alias_table[i].flags & PTA_HLE
3805 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3806 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3807 if (processor_alias_table[i].flags & PTA_PRFCHW
3808 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3809 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3810 if (processor_alias_table[i].flags & PTA_RDSEED
3811 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3812 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3813 if (processor_alias_table[i].flags & PTA_ADX
3814 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3815 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3816 if (processor_alias_table[i].flags & PTA_FXSR
3817 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3818 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3819 if (processor_alias_table[i].flags & PTA_XSAVE
3820 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3821 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3822 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3823 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3824 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3825 if (processor_alias_table[i].flags & PTA_AVX512F
3826 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3827 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3828 if (processor_alias_table[i].flags & PTA_AVX512ER
3829 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3830 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3831 if (processor_alias_table[i].flags & PTA_AVX512PF
3832 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3833 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3834 if (processor_alias_table[i].flags & PTA_AVX512CD
3835 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3836 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3837 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3838 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3839 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3840 if (processor_alias_table[i].flags & PTA_PCOMMIT
3841 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCOMMIT))
3842 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCOMMIT;
3843 if (processor_alias_table[i].flags & PTA_CLWB
3844 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB))
3845 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB;
3846 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3847 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3848 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3849 if (processor_alias_table[i].flags & PTA_XSAVEC
3850 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3851 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3852 if (processor_alias_table[i].flags & PTA_XSAVES
3853 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3854 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3855 if (processor_alias_table[i].flags & PTA_AVX512DQ
3856 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
3857 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
3858 if (processor_alias_table[i].flags & PTA_AVX512BW
3859 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
3860 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
3861 if (processor_alias_table[i].flags & PTA_AVX512VL
3862 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
3863 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
3864 if (processor_alias_table[i].flags & PTA_MPX
3865 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MPX))
3866 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MPX;
3867 if (processor_alias_table[i].flags & PTA_AVX512VBMI
3868 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI))
3869 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI;
3870 if (processor_alias_table[i].flags & PTA_AVX512IFMA
3871 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA))
3872 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA;
3873 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3874 x86_prefetch_sse = true;
3875 if (processor_alias_table[i].flags & PTA_MWAITX
3876 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MWAITX))
3877 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MWAITX;
3879 break;
3882 if (TARGET_X32 && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_MPX))
3883 error ("Intel MPX does not support x32");
3885 if (TARGET_X32 && (ix86_isa_flags & OPTION_MASK_ISA_MPX))
3886 error ("Intel MPX does not support x32");
3888 if (TARGET_IAMCU_P (opts->x_target_flags))
3890 /* Verify that x87/MMX/SSE/AVX is off for -miamcu. */
3891 if (TARGET_80387_P (opts->x_target_flags))
3892 sorry ("X87 FPU isn%'t supported in Intel MCU psABI");
3893 else if ((opts->x_ix86_isa_flags & (OPTION_MASK_ISA_MMX
3894 | OPTION_MASK_ISA_SSE
3895 | OPTION_MASK_ISA_AVX)))
3896 sorry ("%s isn%'t supported in Intel MCU psABI",
3897 TARGET_MMX_P (opts->x_ix86_isa_flags)
3898 ? "MMX"
3899 : TARGET_SSE_P (opts->x_ix86_isa_flags) ? "SSE" : "AVX");
3902 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3903 error ("generic CPU can be used only for %stune=%s %s",
3904 prefix, suffix, sw);
3905 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3906 error ("intel CPU can be used only for %stune=%s %s",
3907 prefix, suffix, sw);
3908 else if (i == pta_size)
3909 error ("bad value (%s) for %sarch=%s %s",
3910 opts->x_ix86_arch_string, prefix, suffix, sw);
3912 ix86_arch_mask = 1u << ix86_arch;
3913 for (i = 0; i < X86_ARCH_LAST; ++i)
3914 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3916 for (i = 0; i < pta_size; i++)
3917 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3919 ix86_schedule = processor_alias_table[i].schedule;
3920 ix86_tune = processor_alias_table[i].processor;
3921 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3923 if (!(processor_alias_table[i].flags & PTA_64BIT))
3925 if (ix86_tune_defaulted)
3927 opts->x_ix86_tune_string = "x86-64";
3928 for (i = 0; i < pta_size; i++)
3929 if (! strcmp (opts->x_ix86_tune_string,
3930 processor_alias_table[i].name))
3931 break;
3932 ix86_schedule = processor_alias_table[i].schedule;
3933 ix86_tune = processor_alias_table[i].processor;
3935 else
3936 error ("CPU you selected does not support x86-64 "
3937 "instruction set");
3940 /* Intel CPUs have always interpreted SSE prefetch instructions as
3941 NOPs; so, we can enable SSE prefetch instructions even when
3942 -mtune (rather than -march) points us to a processor that has them.
3943 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3944 higher processors. */
3945 if (TARGET_CMOV
3946 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3947 x86_prefetch_sse = true;
3948 break;
3951 if (ix86_tune_specified && i == pta_size)
3952 error ("bad value (%s) for %stune=%s %s",
3953 opts->x_ix86_tune_string, prefix, suffix, sw);
3955 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3957 #ifndef USE_IX86_FRAME_POINTER
3958 #define USE_IX86_FRAME_POINTER 0
3959 #endif
3961 #ifndef USE_X86_64_FRAME_POINTER
3962 #define USE_X86_64_FRAME_POINTER 0
3963 #endif
3965 /* Set the default values for switches whose default depends on TARGET_64BIT
3966 in case they weren't overwritten by command line options. */
3967 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3969 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3970 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3971 if (opts->x_flag_asynchronous_unwind_tables
3972 && !opts_set->x_flag_unwind_tables
3973 && TARGET_64BIT_MS_ABI)
3974 opts->x_flag_unwind_tables = 1;
3975 if (opts->x_flag_asynchronous_unwind_tables == 2)
3976 opts->x_flag_unwind_tables
3977 = opts->x_flag_asynchronous_unwind_tables = 1;
3978 if (opts->x_flag_pcc_struct_return == 2)
3979 opts->x_flag_pcc_struct_return = 0;
3981 else
3983 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3984 opts->x_flag_omit_frame_pointer
3985 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3986 if (opts->x_flag_asynchronous_unwind_tables == 2)
3987 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3988 if (opts->x_flag_pcc_struct_return == 2)
3990 /* Intel MCU psABI specifies that -freg-struct-return should
3991 be on. Instead of setting DEFAULT_PCC_STRUCT_RETURN to 1,
3992 we check -miamcu so that -freg-struct-return is always
3993 turned on if -miamcu is used. */
3994 if (TARGET_IAMCU_P (opts->x_target_flags))
3995 opts->x_flag_pcc_struct_return = 0;
3996 else
3997 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
4001 ix86_tune_cost = processor_target_table[ix86_tune].cost;
4002 /* TODO: ix86_cost should be chosen at instruction or function granuality
4003 so for cold code we use size_cost even in !optimize_size compilation. */
4004 if (opts->x_optimize_size)
4005 ix86_cost = &ix86_size_cost;
4006 else
4007 ix86_cost = ix86_tune_cost;
4009 /* Arrange to set up i386_stack_locals for all functions. */
4010 init_machine_status = ix86_init_machine_status;
4012 /* Validate -mregparm= value. */
4013 if (opts_set->x_ix86_regparm)
4015 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4016 warning (0, "-mregparm is ignored in 64-bit mode");
4017 else if (TARGET_IAMCU_P (opts->x_target_flags))
4018 warning (0, "-mregparm is ignored for Intel MCU psABI");
4019 if (opts->x_ix86_regparm > REGPARM_MAX)
4021 error ("-mregparm=%d is not between 0 and %d",
4022 opts->x_ix86_regparm, REGPARM_MAX);
4023 opts->x_ix86_regparm = 0;
4026 if (TARGET_IAMCU_P (opts->x_target_flags)
4027 || TARGET_64BIT_P (opts->x_ix86_isa_flags))
4028 opts->x_ix86_regparm = REGPARM_MAX;
4030 /* Default align_* from the processor table. */
4031 ix86_default_align (opts);
4033 /* Provide default for -mbranch-cost= value. */
4034 if (!opts_set->x_ix86_branch_cost)
4035 opts->x_ix86_branch_cost = ix86_tune_cost->branch_cost;
4037 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4039 opts->x_target_flags
4040 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
4042 /* Enable by default the SSE and MMX builtins. Do allow the user to
4043 explicitly disable any of these. In particular, disabling SSE and
4044 MMX for kernel code is extremely useful. */
4045 if (!ix86_arch_specified)
4046 opts->x_ix86_isa_flags
4047 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
4048 | TARGET_SUBTARGET64_ISA_DEFAULT)
4049 & ~opts->x_ix86_isa_flags_explicit);
4051 if (TARGET_RTD_P (opts->x_target_flags))
4052 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
4054 else
4056 opts->x_target_flags
4057 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
4059 if (!ix86_arch_specified)
4060 opts->x_ix86_isa_flags
4061 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
4063 /* i386 ABI does not specify red zone. It still makes sense to use it
4064 when programmer takes care to stack from being destroyed. */
4065 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
4066 opts->x_target_flags |= MASK_NO_RED_ZONE;
4069 /* Keep nonleaf frame pointers. */
4070 if (opts->x_flag_omit_frame_pointer)
4071 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
4072 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
4073 opts->x_flag_omit_frame_pointer = 1;
4075 /* If we're doing fast math, we don't care about comparison order
4076 wrt NaNs. This lets us use a shorter comparison sequence. */
4077 if (opts->x_flag_finite_math_only)
4078 opts->x_target_flags &= ~MASK_IEEE_FP;
4080 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
4081 since the insns won't need emulation. */
4082 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
4083 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
4085 /* Likewise, if the target doesn't have a 387, or we've specified
4086 software floating point, don't use 387 inline intrinsics. */
4087 if (!TARGET_80387_P (opts->x_target_flags))
4088 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
4090 /* Turn on MMX builtins for -msse. */
4091 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
4092 opts->x_ix86_isa_flags
4093 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
4095 /* Enable SSE prefetch. */
4096 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
4097 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
4098 x86_prefetch_sse = true;
4100 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
4101 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
4102 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
4103 opts->x_ix86_isa_flags
4104 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
4106 /* Enable popcnt instruction for -msse4.2 or -mabm. */
4107 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
4108 || TARGET_ABM_P (opts->x_ix86_isa_flags))
4109 opts->x_ix86_isa_flags
4110 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
4112 /* Enable lzcnt instruction for -mabm. */
4113 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
4114 opts->x_ix86_isa_flags
4115 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
4117 /* Validate -mpreferred-stack-boundary= value or default it to
4118 PREFERRED_STACK_BOUNDARY_DEFAULT. */
4119 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
4120 if (opts_set->x_ix86_preferred_stack_boundary_arg)
4122 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
4123 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
4124 int max = (TARGET_SEH ? 4 : 12);
4126 if (opts->x_ix86_preferred_stack_boundary_arg < min
4127 || opts->x_ix86_preferred_stack_boundary_arg > max)
4129 if (min == max)
4130 error ("-mpreferred-stack-boundary is not supported "
4131 "for this target");
4132 else
4133 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
4134 opts->x_ix86_preferred_stack_boundary_arg, min, max);
4136 else
4137 ix86_preferred_stack_boundary
4138 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
4141 /* Set the default value for -mstackrealign. */
4142 if (opts->x_ix86_force_align_arg_pointer == -1)
4143 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
4145 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
4147 /* Validate -mincoming-stack-boundary= value or default it to
4148 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
4149 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
4150 if (opts_set->x_ix86_incoming_stack_boundary_arg)
4152 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
4153 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
4155 if (opts->x_ix86_incoming_stack_boundary_arg < min
4156 || opts->x_ix86_incoming_stack_boundary_arg > 12)
4157 error ("-mincoming-stack-boundary=%d is not between %d and 12",
4158 opts->x_ix86_incoming_stack_boundary_arg, min);
4159 else
4161 ix86_user_incoming_stack_boundary
4162 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
4163 ix86_incoming_stack_boundary
4164 = ix86_user_incoming_stack_boundary;
4168 #ifndef NO_PROFILE_COUNTERS
4169 if (flag_nop_mcount)
4170 error ("-mnop-mcount is not compatible with this target");
4171 #endif
4172 if (flag_nop_mcount && flag_pic)
4173 error ("-mnop-mcount is not implemented for -fPIC");
4175 /* Accept -msseregparm only if at least SSE support is enabled. */
4176 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
4177 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
4178 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
4180 if (opts_set->x_ix86_fpmath)
4182 if (opts->x_ix86_fpmath & FPMATH_SSE)
4184 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
4186 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4187 opts->x_ix86_fpmath = FPMATH_387;
4189 else if ((opts->x_ix86_fpmath & FPMATH_387)
4190 && !TARGET_80387_P (opts->x_target_flags))
4192 warning (0, "387 instruction set disabled, using SSE arithmetics");
4193 opts->x_ix86_fpmath = FPMATH_SSE;
4197 /* For all chips supporting SSE2, -mfpmath=sse performs better than
4198 fpmath=387. The second is however default at many targets since the
4199 extra 80bit precision of temporaries is considered to be part of ABI.
4200 Overwrite the default at least for -ffast-math.
4201 TODO: -mfpmath=both seems to produce same performing code with bit
4202 smaller binaries. It is however not clear if register allocation is
4203 ready for this setting.
4204 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4205 codegen. We may switch to 387 with -ffast-math for size optimized
4206 functions. */
4207 else if (fast_math_flags_set_p (&global_options)
4208 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4209 opts->x_ix86_fpmath = FPMATH_SSE;
4210 else
4211 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4213 /* If the i387 is disabled, then do not return values in it. */
4214 if (!TARGET_80387_P (opts->x_target_flags))
4215 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4217 /* Use external vectorized library in vectorizing intrinsics. */
4218 if (opts_set->x_ix86_veclibabi_type)
4219 switch (opts->x_ix86_veclibabi_type)
4221 case ix86_veclibabi_type_svml:
4222 ix86_veclib_handler = ix86_veclibabi_svml;
4223 break;
4225 case ix86_veclibabi_type_acml:
4226 ix86_veclib_handler = ix86_veclibabi_acml;
4227 break;
4229 default:
4230 gcc_unreachable ();
4233 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4234 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4235 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4237 /* If stack probes are required, the space used for large function
4238 arguments on the stack must also be probed, so enable
4239 -maccumulate-outgoing-args so this happens in the prologue. */
4240 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4241 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4243 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4244 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4245 "for correctness", prefix, suffix);
4246 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4249 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4251 char *p;
4252 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4253 p = strchr (internal_label_prefix, 'X');
4254 internal_label_prefix_len = p - internal_label_prefix;
4255 *p = '\0';
4258 /* When scheduling description is not available, disable scheduler pass
4259 so it won't slow down the compilation and make x87 code slower. */
4260 if (!TARGET_SCHEDULE)
4261 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4263 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4264 ix86_tune_cost->simultaneous_prefetches,
4265 opts->x_param_values,
4266 opts_set->x_param_values);
4267 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4268 ix86_tune_cost->prefetch_block,
4269 opts->x_param_values,
4270 opts_set->x_param_values);
4271 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4272 ix86_tune_cost->l1_cache_size,
4273 opts->x_param_values,
4274 opts_set->x_param_values);
4275 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4276 ix86_tune_cost->l2_cache_size,
4277 opts->x_param_values,
4278 opts_set->x_param_values);
4280 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4281 if (opts->x_flag_prefetch_loop_arrays < 0
4282 && HAVE_prefetch
4283 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4284 && !opts->x_optimize_size
4285 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4286 opts->x_flag_prefetch_loop_arrays = 1;
4288 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4289 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4290 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4291 targetm.expand_builtin_va_start = NULL;
4293 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4295 ix86_gen_leave = gen_leave_rex64;
4296 if (Pmode == DImode)
4298 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4299 ix86_gen_tls_local_dynamic_base_64
4300 = gen_tls_local_dynamic_base_64_di;
4302 else
4304 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4305 ix86_gen_tls_local_dynamic_base_64
4306 = gen_tls_local_dynamic_base_64_si;
4309 else
4310 ix86_gen_leave = gen_leave;
4312 if (Pmode == DImode)
4314 ix86_gen_add3 = gen_adddi3;
4315 ix86_gen_sub3 = gen_subdi3;
4316 ix86_gen_sub3_carry = gen_subdi3_carry;
4317 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4318 ix86_gen_andsp = gen_anddi3;
4319 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4320 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4321 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4322 ix86_gen_monitor = gen_sse3_monitor_di;
4323 ix86_gen_monitorx = gen_monitorx_di;
4325 else
4327 ix86_gen_add3 = gen_addsi3;
4328 ix86_gen_sub3 = gen_subsi3;
4329 ix86_gen_sub3_carry = gen_subsi3_carry;
4330 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4331 ix86_gen_andsp = gen_andsi3;
4332 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4333 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4334 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4335 ix86_gen_monitor = gen_sse3_monitor_si;
4336 ix86_gen_monitorx = gen_monitorx_si;
4339 #ifdef USE_IX86_CLD
4340 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4341 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4342 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4343 #endif
4345 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4347 if (opts->x_flag_fentry > 0)
4348 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4349 "with -fpic");
4350 opts->x_flag_fentry = 0;
4352 else if (TARGET_SEH)
4354 if (opts->x_flag_fentry == 0)
4355 sorry ("-mno-fentry isn%'t compatible with SEH");
4356 opts->x_flag_fentry = 1;
4358 else if (opts->x_flag_fentry < 0)
4360 #if defined(PROFILE_BEFORE_PROLOGUE)
4361 opts->x_flag_fentry = 1;
4362 #else
4363 opts->x_flag_fentry = 0;
4364 #endif
4367 if (!(opts_set->x_target_flags & MASK_VZEROUPPER))
4368 opts->x_target_flags |= MASK_VZEROUPPER;
4369 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4370 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4371 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4372 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4373 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4374 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4375 /* Enable 128-bit AVX instruction generation
4376 for the auto-vectorizer. */
4377 if (TARGET_AVX128_OPTIMAL
4378 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4379 opts->x_target_flags |= MASK_PREFER_AVX128;
4381 if (opts->x_ix86_recip_name)
4383 char *p = ASTRDUP (opts->x_ix86_recip_name);
4384 char *q;
4385 unsigned int mask, i;
4386 bool invert;
4388 while ((q = strtok (p, ",")) != NULL)
4390 p = NULL;
4391 if (*q == '!')
4393 invert = true;
4394 q++;
4396 else
4397 invert = false;
4399 if (!strcmp (q, "default"))
4400 mask = RECIP_MASK_ALL;
4401 else
4403 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4404 if (!strcmp (q, recip_options[i].string))
4406 mask = recip_options[i].mask;
4407 break;
4410 if (i == ARRAY_SIZE (recip_options))
4412 error ("unknown option for -mrecip=%s", q);
4413 invert = false;
4414 mask = RECIP_MASK_NONE;
4418 opts->x_recip_mask_explicit |= mask;
4419 if (invert)
4420 opts->x_recip_mask &= ~mask;
4421 else
4422 opts->x_recip_mask |= mask;
4426 if (TARGET_RECIP_P (opts->x_target_flags))
4427 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4428 else if (opts_set->x_target_flags & MASK_RECIP)
4429 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4431 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4432 for 64-bit Bionic. Also default long double to 64-bit for Intel
4433 MCU psABI. */
4434 if ((TARGET_HAS_BIONIC || TARGET_IAMCU)
4435 && !(opts_set->x_target_flags
4436 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4437 opts->x_target_flags |= (TARGET_64BIT
4438 ? MASK_LONG_DOUBLE_128
4439 : MASK_LONG_DOUBLE_64);
4441 /* Only one of them can be active. */
4442 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4443 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4445 /* Save the initial options in case the user does function specific
4446 options. */
4447 if (main_args_p)
4448 target_option_default_node = target_option_current_node
4449 = build_target_option_node (opts);
4451 /* Handle stack protector */
4452 if (!opts_set->x_ix86_stack_protector_guard)
4453 opts->x_ix86_stack_protector_guard
4454 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4456 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4457 if (opts->x_ix86_tune_memcpy_strategy)
4459 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4460 ix86_parse_stringop_strategy_string (str, false);
4461 free (str);
4464 if (opts->x_ix86_tune_memset_strategy)
4466 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4467 ix86_parse_stringop_strategy_string (str, true);
4468 free (str);
4472 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4474 static void
4475 ix86_option_override (void)
4477 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4478 struct register_pass_info insert_vzeroupper_info
4479 = { pass_insert_vzeroupper, "reload",
4480 1, PASS_POS_INSERT_AFTER
4483 ix86_option_override_internal (true, &global_options, &global_options_set);
4486 /* This needs to be done at start up. It's convenient to do it here. */
4487 register_pass (&insert_vzeroupper_info);
4490 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
4491 static char *
4492 ix86_offload_options (void)
4494 if (TARGET_LP64)
4495 return xstrdup ("-foffload-abi=lp64");
4496 return xstrdup ("-foffload-abi=ilp32");
4499 /* Update register usage after having seen the compiler flags. */
4501 static void
4502 ix86_conditional_register_usage (void)
4504 int i, c_mask;
4506 /* For 32-bit targets, squash the REX registers. */
4507 if (! TARGET_64BIT)
4509 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4510 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4511 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4512 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4513 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4514 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4517 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4518 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4519 : TARGET_64BIT ? (1 << 2)
4520 : (1 << 1));
4522 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4524 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4526 /* Set/reset conditionally defined registers from
4527 CALL_USED_REGISTERS initializer. */
4528 if (call_used_regs[i] > 1)
4529 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4531 /* Calculate registers of CLOBBERED_REGS register set
4532 as call used registers from GENERAL_REGS register set. */
4533 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4534 && call_used_regs[i])
4535 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4538 /* If MMX is disabled, squash the registers. */
4539 if (! TARGET_MMX)
4540 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4541 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4542 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4544 /* If SSE is disabled, squash the registers. */
4545 if (! TARGET_SSE)
4546 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4547 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4548 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4550 /* If the FPU is disabled, squash the registers. */
4551 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4552 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4553 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4554 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4556 /* If AVX512F is disabled, squash the registers. */
4557 if (! TARGET_AVX512F)
4559 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4560 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4562 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4563 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4566 /* If MPX is disabled, squash the registers. */
4567 if (! TARGET_MPX)
4568 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
4569 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4573 /* Save the current options */
4575 static void
4576 ix86_function_specific_save (struct cl_target_option *ptr,
4577 struct gcc_options *opts)
4579 ptr->arch = ix86_arch;
4580 ptr->schedule = ix86_schedule;
4581 ptr->prefetch_sse = x86_prefetch_sse;
4582 ptr->tune = ix86_tune;
4583 ptr->branch_cost = ix86_branch_cost;
4584 ptr->tune_defaulted = ix86_tune_defaulted;
4585 ptr->arch_specified = ix86_arch_specified;
4586 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4587 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4588 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4589 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4590 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4591 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4592 ptr->x_ix86_abi = opts->x_ix86_abi;
4593 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4594 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4595 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4596 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4597 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4598 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4599 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4600 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4601 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4602 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4603 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4604 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4605 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4606 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4607 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4608 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4609 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4610 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4611 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4612 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4614 /* The fields are char but the variables are not; make sure the
4615 values fit in the fields. */
4616 gcc_assert (ptr->arch == ix86_arch);
4617 gcc_assert (ptr->schedule == ix86_schedule);
4618 gcc_assert (ptr->tune == ix86_tune);
4619 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4622 /* Restore the current options */
4624 static void
4625 ix86_function_specific_restore (struct gcc_options *opts,
4626 struct cl_target_option *ptr)
4628 enum processor_type old_tune = ix86_tune;
4629 enum processor_type old_arch = ix86_arch;
4630 unsigned int ix86_arch_mask;
4631 int i;
4633 /* We don't change -fPIC. */
4634 opts->x_flag_pic = flag_pic;
4636 ix86_arch = (enum processor_type) ptr->arch;
4637 ix86_schedule = (enum attr_cpu) ptr->schedule;
4638 ix86_tune = (enum processor_type) ptr->tune;
4639 x86_prefetch_sse = ptr->prefetch_sse;
4640 opts->x_ix86_branch_cost = ptr->branch_cost;
4641 ix86_tune_defaulted = ptr->tune_defaulted;
4642 ix86_arch_specified = ptr->arch_specified;
4643 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4644 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4645 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4646 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4647 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4648 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4649 opts->x_ix86_abi = ptr->x_ix86_abi;
4650 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4651 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4652 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4653 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4654 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4655 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4656 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4657 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4658 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4659 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4660 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4661 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4662 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4663 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4664 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4665 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4666 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4667 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4668 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4669 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4670 ix86_tune_cost = processor_target_table[ix86_tune].cost;
4671 /* TODO: ix86_cost should be chosen at instruction or function granuality
4672 so for cold code we use size_cost even in !optimize_size compilation. */
4673 if (opts->x_optimize_size)
4674 ix86_cost = &ix86_size_cost;
4675 else
4676 ix86_cost = ix86_tune_cost;
4678 /* Recreate the arch feature tests if the arch changed */
4679 if (old_arch != ix86_arch)
4681 ix86_arch_mask = 1u << ix86_arch;
4682 for (i = 0; i < X86_ARCH_LAST; ++i)
4683 ix86_arch_features[i]
4684 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4687 /* Recreate the tune optimization tests */
4688 if (old_tune != ix86_tune)
4689 set_ix86_tune_features (ix86_tune, false);
4692 /* Adjust target options after streaming them in. This is mainly about
4693 reconciling them with global options. */
4695 static void
4696 ix86_function_specific_post_stream_in (struct cl_target_option *ptr)
4698 /* flag_pic is a global option, but ix86_cmodel is target saved option
4699 partly computed from flag_pic. If flag_pic is on, adjust x_ix86_cmodel
4700 for PIC, or error out. */
4701 if (flag_pic)
4702 switch (ptr->x_ix86_cmodel)
4704 case CM_SMALL:
4705 ptr->x_ix86_cmodel = CM_SMALL_PIC;
4706 break;
4708 case CM_MEDIUM:
4709 ptr->x_ix86_cmodel = CM_MEDIUM_PIC;
4710 break;
4712 case CM_LARGE:
4713 ptr->x_ix86_cmodel = CM_LARGE_PIC;
4714 break;
4716 case CM_KERNEL:
4717 error ("code model %s does not support PIC mode", "kernel");
4718 break;
4720 default:
4721 break;
4723 else
4724 switch (ptr->x_ix86_cmodel)
4726 case CM_SMALL_PIC:
4727 ptr->x_ix86_cmodel = CM_SMALL;
4728 break;
4730 case CM_MEDIUM_PIC:
4731 ptr->x_ix86_cmodel = CM_MEDIUM;
4732 break;
4734 case CM_LARGE_PIC:
4735 ptr->x_ix86_cmodel = CM_LARGE;
4736 break;
4738 default:
4739 break;
4743 /* Print the current options */
4745 static void
4746 ix86_function_specific_print (FILE *file, int indent,
4747 struct cl_target_option *ptr)
4749 char *target_string
4750 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4751 NULL, NULL, ptr->x_ix86_fpmath, false);
4753 gcc_assert (ptr->arch < PROCESSOR_max);
4754 fprintf (file, "%*sarch = %d (%s)\n",
4755 indent, "",
4756 ptr->arch, processor_target_table[ptr->arch].name);
4758 gcc_assert (ptr->tune < PROCESSOR_max);
4759 fprintf (file, "%*stune = %d (%s)\n",
4760 indent, "",
4761 ptr->tune, processor_target_table[ptr->tune].name);
4763 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4765 if (target_string)
4767 fprintf (file, "%*s%s\n", indent, "", target_string);
4768 free (target_string);
4773 /* Inner function to process the attribute((target(...))), take an argument and
4774 set the current options from the argument. If we have a list, recursively go
4775 over the list. */
4777 static bool
4778 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4779 struct gcc_options *opts,
4780 struct gcc_options *opts_set,
4781 struct gcc_options *enum_opts_set)
4783 char *next_optstr;
4784 bool ret = true;
4786 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4787 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4788 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4789 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4790 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4792 enum ix86_opt_type
4794 ix86_opt_unknown,
4795 ix86_opt_yes,
4796 ix86_opt_no,
4797 ix86_opt_str,
4798 ix86_opt_enum,
4799 ix86_opt_isa
4802 static const struct
4804 const char *string;
4805 size_t len;
4806 enum ix86_opt_type type;
4807 int opt;
4808 int mask;
4809 } attrs[] = {
4810 /* isa options */
4811 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4812 IX86_ATTR_ISA ("abm", OPT_mabm),
4813 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4814 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4815 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4816 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4817 IX86_ATTR_ISA ("aes", OPT_maes),
4818 IX86_ATTR_ISA ("sha", OPT_msha),
4819 IX86_ATTR_ISA ("avx", OPT_mavx),
4820 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4821 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4822 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4823 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4824 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4825 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
4826 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
4827 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
4828 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4829 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4830 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4831 IX86_ATTR_ISA ("sse", OPT_msse),
4832 IX86_ATTR_ISA ("sse2", OPT_msse2),
4833 IX86_ATTR_ISA ("sse3", OPT_msse3),
4834 IX86_ATTR_ISA ("sse4", OPT_msse4),
4835 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4836 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4837 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4838 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4839 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4840 IX86_ATTR_ISA ("fma", OPT_mfma),
4841 IX86_ATTR_ISA ("xop", OPT_mxop),
4842 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4843 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4844 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4845 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4846 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4847 IX86_ATTR_ISA ("hle", OPT_mhle),
4848 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4849 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4850 IX86_ATTR_ISA ("adx", OPT_madx),
4851 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4852 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4853 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4854 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4855 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
4856 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
4857 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
4858 IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi),
4859 IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma),
4860 IX86_ATTR_ISA ("clwb", OPT_mclwb),
4861 IX86_ATTR_ISA ("pcommit", OPT_mpcommit),
4862 IX86_ATTR_ISA ("mwaitx", OPT_mmwaitx),
4864 /* enum options */
4865 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4867 /* string options */
4868 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4869 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4871 /* flag options */
4872 IX86_ATTR_YES ("cld",
4873 OPT_mcld,
4874 MASK_CLD),
4876 IX86_ATTR_NO ("fancy-math-387",
4877 OPT_mfancy_math_387,
4878 MASK_NO_FANCY_MATH_387),
4880 IX86_ATTR_YES ("ieee-fp",
4881 OPT_mieee_fp,
4882 MASK_IEEE_FP),
4884 IX86_ATTR_YES ("inline-all-stringops",
4885 OPT_minline_all_stringops,
4886 MASK_INLINE_ALL_STRINGOPS),
4888 IX86_ATTR_YES ("inline-stringops-dynamically",
4889 OPT_minline_stringops_dynamically,
4890 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4892 IX86_ATTR_NO ("align-stringops",
4893 OPT_mno_align_stringops,
4894 MASK_NO_ALIGN_STRINGOPS),
4896 IX86_ATTR_YES ("recip",
4897 OPT_mrecip,
4898 MASK_RECIP),
4902 /* If this is a list, recurse to get the options. */
4903 if (TREE_CODE (args) == TREE_LIST)
4905 bool ret = true;
4907 for (; args; args = TREE_CHAIN (args))
4908 if (TREE_VALUE (args)
4909 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4910 p_strings, opts, opts_set,
4911 enum_opts_set))
4912 ret = false;
4914 return ret;
4917 else if (TREE_CODE (args) != STRING_CST)
4919 error ("attribute %<target%> argument not a string");
4920 return false;
4923 /* Handle multiple arguments separated by commas. */
4924 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4926 while (next_optstr && *next_optstr != '\0')
4928 char *p = next_optstr;
4929 char *orig_p = p;
4930 char *comma = strchr (next_optstr, ',');
4931 const char *opt_string;
4932 size_t len, opt_len;
4933 int opt;
4934 bool opt_set_p;
4935 char ch;
4936 unsigned i;
4937 enum ix86_opt_type type = ix86_opt_unknown;
4938 int mask = 0;
4940 if (comma)
4942 *comma = '\0';
4943 len = comma - next_optstr;
4944 next_optstr = comma + 1;
4946 else
4948 len = strlen (p);
4949 next_optstr = NULL;
4952 /* Recognize no-xxx. */
4953 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4955 opt_set_p = false;
4956 p += 3;
4957 len -= 3;
4959 else
4960 opt_set_p = true;
4962 /* Find the option. */
4963 ch = *p;
4964 opt = N_OPTS;
4965 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4967 type = attrs[i].type;
4968 opt_len = attrs[i].len;
4969 if (ch == attrs[i].string[0]
4970 && ((type != ix86_opt_str && type != ix86_opt_enum)
4971 ? len == opt_len
4972 : len > opt_len)
4973 && memcmp (p, attrs[i].string, opt_len) == 0)
4975 opt = attrs[i].opt;
4976 mask = attrs[i].mask;
4977 opt_string = attrs[i].string;
4978 break;
4982 /* Process the option. */
4983 if (opt == N_OPTS)
4985 error ("attribute(target(\"%s\")) is unknown", orig_p);
4986 ret = false;
4989 else if (type == ix86_opt_isa)
4991 struct cl_decoded_option decoded;
4993 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4994 ix86_handle_option (opts, opts_set,
4995 &decoded, input_location);
4998 else if (type == ix86_opt_yes || type == ix86_opt_no)
5000 if (type == ix86_opt_no)
5001 opt_set_p = !opt_set_p;
5003 if (opt_set_p)
5004 opts->x_target_flags |= mask;
5005 else
5006 opts->x_target_flags &= ~mask;
5009 else if (type == ix86_opt_str)
5011 if (p_strings[opt])
5013 error ("option(\"%s\") was already specified", opt_string);
5014 ret = false;
5016 else
5017 p_strings[opt] = xstrdup (p + opt_len);
5020 else if (type == ix86_opt_enum)
5022 bool arg_ok;
5023 int value;
5025 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
5026 if (arg_ok)
5027 set_option (opts, enum_opts_set, opt, value,
5028 p + opt_len, DK_UNSPECIFIED, input_location,
5029 global_dc);
5030 else
5032 error ("attribute(target(\"%s\")) is unknown", orig_p);
5033 ret = false;
5037 else
5038 gcc_unreachable ();
5041 return ret;
5044 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
5046 tree
5047 ix86_valid_target_attribute_tree (tree args,
5048 struct gcc_options *opts,
5049 struct gcc_options *opts_set)
5051 const char *orig_arch_string = opts->x_ix86_arch_string;
5052 const char *orig_tune_string = opts->x_ix86_tune_string;
5053 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
5054 int orig_tune_defaulted = ix86_tune_defaulted;
5055 int orig_arch_specified = ix86_arch_specified;
5056 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
5057 tree t = NULL_TREE;
5058 int i;
5059 struct cl_target_option *def
5060 = TREE_TARGET_OPTION (target_option_default_node);
5061 struct gcc_options enum_opts_set;
5063 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
5065 /* Process each of the options on the chain. */
5066 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
5067 opts_set, &enum_opts_set))
5068 return error_mark_node;
5070 /* If the changed options are different from the default, rerun
5071 ix86_option_override_internal, and then save the options away.
5072 The string options are attribute options, and will be undone
5073 when we copy the save structure. */
5074 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
5075 || opts->x_target_flags != def->x_target_flags
5076 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
5077 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
5078 || enum_opts_set.x_ix86_fpmath)
5080 /* If we are using the default tune= or arch=, undo the string assigned,
5081 and use the default. */
5082 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
5083 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
5084 else if (!orig_arch_specified)
5085 opts->x_ix86_arch_string = NULL;
5087 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
5088 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
5089 else if (orig_tune_defaulted)
5090 opts->x_ix86_tune_string = NULL;
5092 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
5093 if (enum_opts_set.x_ix86_fpmath)
5094 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
5095 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
5096 && TARGET_SSE_P (opts->x_ix86_isa_flags))
5098 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
5099 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
5102 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
5103 ix86_option_override_internal (false, opts, opts_set);
5105 /* Add any builtin functions with the new isa if any. */
5106 ix86_add_new_builtins (opts->x_ix86_isa_flags);
5108 /* Save the current options unless we are validating options for
5109 #pragma. */
5110 t = build_target_option_node (opts);
5112 opts->x_ix86_arch_string = orig_arch_string;
5113 opts->x_ix86_tune_string = orig_tune_string;
5114 opts_set->x_ix86_fpmath = orig_fpmath_set;
5116 /* Free up memory allocated to hold the strings */
5117 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
5118 free (option_strings[i]);
5121 return t;
5124 /* Hook to validate attribute((target("string"))). */
5126 static bool
5127 ix86_valid_target_attribute_p (tree fndecl,
5128 tree ARG_UNUSED (name),
5129 tree args,
5130 int ARG_UNUSED (flags))
5132 struct gcc_options func_options;
5133 tree new_target, new_optimize;
5134 bool ret = true;
5136 /* attribute((target("default"))) does nothing, beyond
5137 affecting multi-versioning. */
5138 if (TREE_VALUE (args)
5139 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
5140 && TREE_CHAIN (args) == NULL_TREE
5141 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
5142 return true;
5144 tree old_optimize = build_optimization_node (&global_options);
5146 /* Get the optimization options of the current function. */
5147 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
5149 if (!func_optimize)
5150 func_optimize = old_optimize;
5152 /* Init func_options. */
5153 memset (&func_options, 0, sizeof (func_options));
5154 init_options_struct (&func_options, NULL);
5155 lang_hooks.init_options_struct (&func_options);
5157 cl_optimization_restore (&func_options,
5158 TREE_OPTIMIZATION (func_optimize));
5160 /* Initialize func_options to the default before its target options can
5161 be set. */
5162 cl_target_option_restore (&func_options,
5163 TREE_TARGET_OPTION (target_option_default_node));
5165 new_target = ix86_valid_target_attribute_tree (args, &func_options,
5166 &global_options_set);
5168 new_optimize = build_optimization_node (&func_options);
5170 if (new_target == error_mark_node)
5171 ret = false;
5173 else if (fndecl && new_target)
5175 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
5177 if (old_optimize != new_optimize)
5178 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
5181 return ret;
5185 /* Hook to determine if one function can safely inline another. */
5187 static bool
5188 ix86_can_inline_p (tree caller, tree callee)
5190 bool ret = false;
5191 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
5192 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
5194 /* If callee has no option attributes, then it is ok to inline. */
5195 if (!callee_tree)
5196 ret = true;
5198 /* If caller has no option attributes, but callee does then it is not ok to
5199 inline. */
5200 else if (!caller_tree)
5201 ret = false;
5203 else
5205 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
5206 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
5208 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
5209 can inline a SSE2 function but a SSE2 function can't inline a SSE4
5210 function. */
5211 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
5212 != callee_opts->x_ix86_isa_flags)
5213 ret = false;
5215 /* See if we have the same non-isa options. */
5216 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
5217 ret = false;
5219 /* See if arch, tune, etc. are the same. */
5220 else if (caller_opts->arch != callee_opts->arch)
5221 ret = false;
5223 else if (caller_opts->tune != callee_opts->tune)
5224 ret = false;
5226 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
5227 ret = false;
5229 else if (caller_opts->branch_cost != callee_opts->branch_cost)
5230 ret = false;
5232 else
5233 ret = true;
5236 return ret;
5240 /* Remember the last target of ix86_set_current_function. */
5241 static GTY(()) tree ix86_previous_fndecl;
5243 /* Set targets globals to the default (or current #pragma GCC target
5244 if active). Invalidate ix86_previous_fndecl cache. */
5246 void
5247 ix86_reset_previous_fndecl (void)
5249 tree new_tree = target_option_current_node;
5250 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5251 if (TREE_TARGET_GLOBALS (new_tree))
5252 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5253 else if (new_tree == target_option_default_node)
5254 restore_target_globals (&default_target_globals);
5255 else
5256 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5257 ix86_previous_fndecl = NULL_TREE;
5260 /* Establish appropriate back-end context for processing the function
5261 FNDECL. The argument might be NULL to indicate processing at top
5262 level, outside of any function scope. */
5263 static void
5264 ix86_set_current_function (tree fndecl)
5266 /* Only change the context if the function changes. This hook is called
5267 several times in the course of compiling a function, and we don't want to
5268 slow things down too much or call target_reinit when it isn't safe. */
5269 if (fndecl == ix86_previous_fndecl)
5270 return;
5272 tree old_tree;
5273 if (ix86_previous_fndecl == NULL_TREE)
5274 old_tree = target_option_current_node;
5275 else if (DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl))
5276 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl);
5277 else
5278 old_tree = target_option_default_node;
5280 if (fndecl == NULL_TREE)
5282 if (old_tree != target_option_current_node)
5283 ix86_reset_previous_fndecl ();
5284 return;
5287 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
5288 if (new_tree == NULL_TREE)
5289 new_tree = target_option_default_node;
5291 if (old_tree != new_tree)
5293 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5294 if (TREE_TARGET_GLOBALS (new_tree))
5295 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5296 else if (new_tree == target_option_default_node)
5297 restore_target_globals (&default_target_globals);
5298 else
5299 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5301 ix86_previous_fndecl = fndecl;
5305 /* Return true if this goes in large data/bss. */
5307 static bool
5308 ix86_in_large_data_p (tree exp)
5310 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5311 return false;
5313 /* Functions are never large data. */
5314 if (TREE_CODE (exp) == FUNCTION_DECL)
5315 return false;
5317 /* Automatic variables are never large data. */
5318 if (TREE_CODE (exp) == VAR_DECL && !is_global_var (exp))
5319 return false;
5321 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5323 const char *section = DECL_SECTION_NAME (exp);
5324 if (strcmp (section, ".ldata") == 0
5325 || strcmp (section, ".lbss") == 0)
5326 return true;
5327 return false;
5329 else
5331 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5333 /* If this is an incomplete type with size 0, then we can't put it
5334 in data because it might be too big when completed. Also,
5335 int_size_in_bytes returns -1 if size can vary or is larger than
5336 an integer in which case also it is safer to assume that it goes in
5337 large data. */
5338 if (size <= 0 || size > ix86_section_threshold)
5339 return true;
5342 return false;
5345 /* Switch to the appropriate section for output of DECL.
5346 DECL is either a `VAR_DECL' node or a constant of some sort.
5347 RELOC indicates whether forming the initial value of DECL requires
5348 link-time relocations. */
5350 ATTRIBUTE_UNUSED static section *
5351 x86_64_elf_select_section (tree decl, int reloc,
5352 unsigned HOST_WIDE_INT align)
5354 if (ix86_in_large_data_p (decl))
5356 const char *sname = NULL;
5357 unsigned int flags = SECTION_WRITE;
5358 switch (categorize_decl_for_section (decl, reloc))
5360 case SECCAT_DATA:
5361 sname = ".ldata";
5362 break;
5363 case SECCAT_DATA_REL:
5364 sname = ".ldata.rel";
5365 break;
5366 case SECCAT_DATA_REL_LOCAL:
5367 sname = ".ldata.rel.local";
5368 break;
5369 case SECCAT_DATA_REL_RO:
5370 sname = ".ldata.rel.ro";
5371 break;
5372 case SECCAT_DATA_REL_RO_LOCAL:
5373 sname = ".ldata.rel.ro.local";
5374 break;
5375 case SECCAT_BSS:
5376 sname = ".lbss";
5377 flags |= SECTION_BSS;
5378 break;
5379 case SECCAT_RODATA:
5380 case SECCAT_RODATA_MERGE_STR:
5381 case SECCAT_RODATA_MERGE_STR_INIT:
5382 case SECCAT_RODATA_MERGE_CONST:
5383 sname = ".lrodata";
5384 flags = 0;
5385 break;
5386 case SECCAT_SRODATA:
5387 case SECCAT_SDATA:
5388 case SECCAT_SBSS:
5389 gcc_unreachable ();
5390 case SECCAT_TEXT:
5391 case SECCAT_TDATA:
5392 case SECCAT_TBSS:
5393 /* We don't split these for medium model. Place them into
5394 default sections and hope for best. */
5395 break;
5397 if (sname)
5399 /* We might get called with string constants, but get_named_section
5400 doesn't like them as they are not DECLs. Also, we need to set
5401 flags in that case. */
5402 if (!DECL_P (decl))
5403 return get_section (sname, flags, NULL);
5404 return get_named_section (decl, sname, reloc);
5407 return default_elf_select_section (decl, reloc, align);
5410 /* Select a set of attributes for section NAME based on the properties
5411 of DECL and whether or not RELOC indicates that DECL's initializer
5412 might contain runtime relocations. */
5414 static unsigned int ATTRIBUTE_UNUSED
5415 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5417 unsigned int flags = default_section_type_flags (decl, name, reloc);
5419 if (decl == NULL_TREE
5420 && (strcmp (name, ".ldata.rel.ro") == 0
5421 || strcmp (name, ".ldata.rel.ro.local") == 0))
5422 flags |= SECTION_RELRO;
5424 if (strcmp (name, ".lbss") == 0
5425 || strncmp (name, ".lbss.", 5) == 0
5426 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5427 flags |= SECTION_BSS;
5429 return flags;
5432 /* Build up a unique section name, expressed as a
5433 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5434 RELOC indicates whether the initial value of EXP requires
5435 link-time relocations. */
5437 static void ATTRIBUTE_UNUSED
5438 x86_64_elf_unique_section (tree decl, int reloc)
5440 if (ix86_in_large_data_p (decl))
5442 const char *prefix = NULL;
5443 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5444 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5446 switch (categorize_decl_for_section (decl, reloc))
5448 case SECCAT_DATA:
5449 case SECCAT_DATA_REL:
5450 case SECCAT_DATA_REL_LOCAL:
5451 case SECCAT_DATA_REL_RO:
5452 case SECCAT_DATA_REL_RO_LOCAL:
5453 prefix = one_only ? ".ld" : ".ldata";
5454 break;
5455 case SECCAT_BSS:
5456 prefix = one_only ? ".lb" : ".lbss";
5457 break;
5458 case SECCAT_RODATA:
5459 case SECCAT_RODATA_MERGE_STR:
5460 case SECCAT_RODATA_MERGE_STR_INIT:
5461 case SECCAT_RODATA_MERGE_CONST:
5462 prefix = one_only ? ".lr" : ".lrodata";
5463 break;
5464 case SECCAT_SRODATA:
5465 case SECCAT_SDATA:
5466 case SECCAT_SBSS:
5467 gcc_unreachable ();
5468 case SECCAT_TEXT:
5469 case SECCAT_TDATA:
5470 case SECCAT_TBSS:
5471 /* We don't split these for medium model. Place them into
5472 default sections and hope for best. */
5473 break;
5475 if (prefix)
5477 const char *name, *linkonce;
5478 char *string;
5480 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5481 name = targetm.strip_name_encoding (name);
5483 /* If we're using one_only, then there needs to be a .gnu.linkonce
5484 prefix to the section name. */
5485 linkonce = one_only ? ".gnu.linkonce" : "";
5487 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5489 set_decl_section_name (decl, string);
5490 return;
5493 default_unique_section (decl, reloc);
5496 #ifdef COMMON_ASM_OP
5497 /* This says how to output assembler code to declare an
5498 uninitialized external linkage data object.
5500 For medium model x86-64 we need to use .largecomm opcode for
5501 large objects. */
5502 void
5503 x86_elf_aligned_common (FILE *file,
5504 const char *name, unsigned HOST_WIDE_INT size,
5505 int align)
5507 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5508 && size > (unsigned int)ix86_section_threshold)
5509 fputs ("\t.largecomm\t", file);
5510 else
5511 fputs (COMMON_ASM_OP, file);
5512 assemble_name (file, name);
5513 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5514 size, align / BITS_PER_UNIT);
5516 #endif
5518 /* Utility function for targets to use in implementing
5519 ASM_OUTPUT_ALIGNED_BSS. */
5521 void
5522 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
5523 unsigned HOST_WIDE_INT size, int align)
5525 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5526 && size > (unsigned int)ix86_section_threshold)
5527 switch_to_section (get_named_section (decl, ".lbss", 0));
5528 else
5529 switch_to_section (bss_section);
5530 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5531 #ifdef ASM_DECLARE_OBJECT_NAME
5532 last_assemble_variable_decl = decl;
5533 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5534 #else
5535 /* Standard thing is just output label for the object. */
5536 ASM_OUTPUT_LABEL (file, name);
5537 #endif /* ASM_DECLARE_OBJECT_NAME */
5538 ASM_OUTPUT_SKIP (file, size ? size : 1);
5541 /* Decide whether we must probe the stack before any space allocation
5542 on this target. It's essentially TARGET_STACK_PROBE except when
5543 -fstack-check causes the stack to be already probed differently. */
5545 bool
5546 ix86_target_stack_probe (void)
5548 /* Do not probe the stack twice if static stack checking is enabled. */
5549 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5550 return false;
5552 return TARGET_STACK_PROBE;
5555 /* Decide whether we can make a sibling call to a function. DECL is the
5556 declaration of the function being targeted by the call and EXP is the
5557 CALL_EXPR representing the call. */
5559 static bool
5560 ix86_function_ok_for_sibcall (tree decl, tree exp)
5562 tree type, decl_or_type;
5563 rtx a, b;
5565 /* If we are generating position-independent code, we cannot sibcall
5566 optimize direct calls to global functions, as the PLT requires
5567 %ebx be live. (Darwin does not have a PLT.) */
5568 if (!TARGET_MACHO
5569 && !TARGET_64BIT
5570 && flag_pic
5571 && flag_plt
5572 && decl && !targetm.binds_local_p (decl))
5573 return false;
5575 /* If we need to align the outgoing stack, then sibcalling would
5576 unalign the stack, which may break the called function. */
5577 if (ix86_minimum_incoming_stack_boundary (true)
5578 < PREFERRED_STACK_BOUNDARY)
5579 return false;
5581 if (decl)
5583 decl_or_type = decl;
5584 type = TREE_TYPE (decl);
5586 else
5588 /* We're looking at the CALL_EXPR, we need the type of the function. */
5589 type = CALL_EXPR_FN (exp); /* pointer expression */
5590 type = TREE_TYPE (type); /* pointer type */
5591 type = TREE_TYPE (type); /* function type */
5592 decl_or_type = type;
5595 /* Check that the return value locations are the same. Like
5596 if we are returning floats on the 80387 register stack, we cannot
5597 make a sibcall from a function that doesn't return a float to a
5598 function that does or, conversely, from a function that does return
5599 a float to a function that doesn't; the necessary stack adjustment
5600 would not be executed. This is also the place we notice
5601 differences in the return value ABI. Note that it is ok for one
5602 of the functions to have void return type as long as the return
5603 value of the other is passed in a register. */
5604 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5605 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5606 cfun->decl, false);
5607 if (STACK_REG_P (a) || STACK_REG_P (b))
5609 if (!rtx_equal_p (a, b))
5610 return false;
5612 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5614 else if (!rtx_equal_p (a, b))
5615 return false;
5617 if (TARGET_64BIT)
5619 /* The SYSV ABI has more call-clobbered registers;
5620 disallow sibcalls from MS to SYSV. */
5621 if (cfun->machine->call_abi == MS_ABI
5622 && ix86_function_type_abi (type) == SYSV_ABI)
5623 return false;
5625 else
5627 /* If this call is indirect, we'll need to be able to use a
5628 call-clobbered register for the address of the target function.
5629 Make sure that all such registers are not used for passing
5630 parameters. Note that DLLIMPORT functions are indirect. */
5631 if (!decl
5632 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5634 /* Check if regparm >= 3 since arg_reg_available is set to
5635 false if regparm == 0. If regparm is 1 or 2, there is
5636 always a call-clobbered register available.
5638 ??? The symbol indirect call doesn't need a call-clobbered
5639 register. But we don't know if this is a symbol indirect
5640 call or not here. */
5641 if (ix86_function_regparm (type, NULL) >= 3
5642 && !cfun->machine->arg_reg_available)
5643 return false;
5647 /* Otherwise okay. That also includes certain types of indirect calls. */
5648 return true;
5651 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5652 and "sseregparm" calling convention attributes;
5653 arguments as in struct attribute_spec.handler. */
5655 static tree
5656 ix86_handle_cconv_attribute (tree *node, tree name,
5657 tree args,
5658 int,
5659 bool *no_add_attrs)
5661 if (TREE_CODE (*node) != FUNCTION_TYPE
5662 && TREE_CODE (*node) != METHOD_TYPE
5663 && TREE_CODE (*node) != FIELD_DECL
5664 && TREE_CODE (*node) != TYPE_DECL)
5666 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5667 name);
5668 *no_add_attrs = true;
5669 return NULL_TREE;
5672 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5673 if (is_attribute_p ("regparm", name))
5675 tree cst;
5677 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5679 error ("fastcall and regparm attributes are not compatible");
5682 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5684 error ("regparam and thiscall attributes are not compatible");
5687 cst = TREE_VALUE (args);
5688 if (TREE_CODE (cst) != INTEGER_CST)
5690 warning (OPT_Wattributes,
5691 "%qE attribute requires an integer constant argument",
5692 name);
5693 *no_add_attrs = true;
5695 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5697 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5698 name, REGPARM_MAX);
5699 *no_add_attrs = true;
5702 return NULL_TREE;
5705 if (TARGET_64BIT)
5707 /* Do not warn when emulating the MS ABI. */
5708 if ((TREE_CODE (*node) != FUNCTION_TYPE
5709 && TREE_CODE (*node) != METHOD_TYPE)
5710 || ix86_function_type_abi (*node) != MS_ABI)
5711 warning (OPT_Wattributes, "%qE attribute ignored",
5712 name);
5713 *no_add_attrs = true;
5714 return NULL_TREE;
5717 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5718 if (is_attribute_p ("fastcall", name))
5720 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5722 error ("fastcall and cdecl attributes are not compatible");
5724 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5726 error ("fastcall and stdcall attributes are not compatible");
5728 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5730 error ("fastcall and regparm attributes are not compatible");
5732 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5734 error ("fastcall and thiscall attributes are not compatible");
5738 /* Can combine stdcall with fastcall (redundant), regparm and
5739 sseregparm. */
5740 else if (is_attribute_p ("stdcall", name))
5742 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5744 error ("stdcall and cdecl attributes are not compatible");
5746 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5748 error ("stdcall and fastcall attributes are not compatible");
5750 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5752 error ("stdcall and thiscall attributes are not compatible");
5756 /* Can combine cdecl with regparm and sseregparm. */
5757 else if (is_attribute_p ("cdecl", name))
5759 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5761 error ("stdcall and cdecl attributes are not compatible");
5763 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5765 error ("fastcall and cdecl attributes are not compatible");
5767 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5769 error ("cdecl and thiscall attributes are not compatible");
5772 else if (is_attribute_p ("thiscall", name))
5774 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5775 warning (OPT_Wattributes, "%qE attribute is used for non-class method",
5776 name);
5777 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5779 error ("stdcall and thiscall attributes are not compatible");
5781 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5783 error ("fastcall and thiscall attributes are not compatible");
5785 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5787 error ("cdecl and thiscall attributes are not compatible");
5791 /* Can combine sseregparm with all attributes. */
5793 return NULL_TREE;
5796 /* The transactional memory builtins are implicitly regparm or fastcall
5797 depending on the ABI. Override the generic do-nothing attribute that
5798 these builtins were declared with, and replace it with one of the two
5799 attributes that we expect elsewhere. */
5801 static tree
5802 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
5803 int flags, bool *no_add_attrs)
5805 tree alt;
5807 /* In no case do we want to add the placeholder attribute. */
5808 *no_add_attrs = true;
5810 /* The 64-bit ABI is unchanged for transactional memory. */
5811 if (TARGET_64BIT)
5812 return NULL_TREE;
5814 /* ??? Is there a better way to validate 32-bit windows? We have
5815 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5816 if (CHECK_STACK_LIMIT > 0)
5817 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5818 else
5820 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5821 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5823 decl_attributes (node, alt, flags);
5825 return NULL_TREE;
5828 /* This function determines from TYPE the calling-convention. */
5830 unsigned int
5831 ix86_get_callcvt (const_tree type)
5833 unsigned int ret = 0;
5834 bool is_stdarg;
5835 tree attrs;
5837 if (TARGET_64BIT)
5838 return IX86_CALLCVT_CDECL;
5840 attrs = TYPE_ATTRIBUTES (type);
5841 if (attrs != NULL_TREE)
5843 if (lookup_attribute ("cdecl", attrs))
5844 ret |= IX86_CALLCVT_CDECL;
5845 else if (lookup_attribute ("stdcall", attrs))
5846 ret |= IX86_CALLCVT_STDCALL;
5847 else if (lookup_attribute ("fastcall", attrs))
5848 ret |= IX86_CALLCVT_FASTCALL;
5849 else if (lookup_attribute ("thiscall", attrs))
5850 ret |= IX86_CALLCVT_THISCALL;
5852 /* Regparam isn't allowed for thiscall and fastcall. */
5853 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5855 if (lookup_attribute ("regparm", attrs))
5856 ret |= IX86_CALLCVT_REGPARM;
5857 if (lookup_attribute ("sseregparm", attrs))
5858 ret |= IX86_CALLCVT_SSEREGPARM;
5861 if (IX86_BASE_CALLCVT(ret) != 0)
5862 return ret;
5865 is_stdarg = stdarg_p (type);
5866 if (TARGET_RTD && !is_stdarg)
5867 return IX86_CALLCVT_STDCALL | ret;
5869 if (ret != 0
5870 || is_stdarg
5871 || TREE_CODE (type) != METHOD_TYPE
5872 || ix86_function_type_abi (type) != MS_ABI)
5873 return IX86_CALLCVT_CDECL | ret;
5875 return IX86_CALLCVT_THISCALL;
5878 /* Return 0 if the attributes for two types are incompatible, 1 if they
5879 are compatible, and 2 if they are nearly compatible (which causes a
5880 warning to be generated). */
5882 static int
5883 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5885 unsigned int ccvt1, ccvt2;
5887 if (TREE_CODE (type1) != FUNCTION_TYPE
5888 && TREE_CODE (type1) != METHOD_TYPE)
5889 return 1;
5891 ccvt1 = ix86_get_callcvt (type1);
5892 ccvt2 = ix86_get_callcvt (type2);
5893 if (ccvt1 != ccvt2)
5894 return 0;
5895 if (ix86_function_regparm (type1, NULL)
5896 != ix86_function_regparm (type2, NULL))
5897 return 0;
5899 return 1;
5902 /* Return the regparm value for a function with the indicated TYPE and DECL.
5903 DECL may be NULL when calling function indirectly
5904 or considering a libcall. */
5906 static int
5907 ix86_function_regparm (const_tree type, const_tree decl)
5909 tree attr;
5910 int regparm;
5911 unsigned int ccvt;
5913 if (TARGET_64BIT)
5914 return (ix86_function_type_abi (type) == SYSV_ABI
5915 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5916 ccvt = ix86_get_callcvt (type);
5917 regparm = ix86_regparm;
5919 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5921 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5922 if (attr)
5924 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5925 return regparm;
5928 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5929 return 2;
5930 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5931 return 1;
5933 /* Use register calling convention for local functions when possible. */
5934 if (decl
5935 && TREE_CODE (decl) == FUNCTION_DECL)
5937 cgraph_node *target = cgraph_node::get (decl);
5938 if (target)
5939 target = target->function_symbol ();
5941 /* Caller and callee must agree on the calling convention, so
5942 checking here just optimize means that with
5943 __attribute__((optimize (...))) caller could use regparm convention
5944 and callee not, or vice versa. Instead look at whether the callee
5945 is optimized or not. */
5946 if (target && opt_for_fn (target->decl, optimize)
5947 && !(profile_flag && !flag_fentry))
5949 cgraph_local_info *i = &target->local;
5950 if (i && i->local && i->can_change_signature)
5952 int local_regparm, globals = 0, regno;
5954 /* Make sure no regparm register is taken by a
5955 fixed register variable. */
5956 for (local_regparm = 0; local_regparm < REGPARM_MAX;
5957 local_regparm++)
5958 if (fixed_regs[local_regparm])
5959 break;
5961 /* We don't want to use regparm(3) for nested functions as
5962 these use a static chain pointer in the third argument. */
5963 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
5964 local_regparm = 2;
5966 /* Save a register for the split stack. */
5967 if (local_regparm == 3 && flag_split_stack)
5968 local_regparm = 2;
5970 /* Each fixed register usage increases register pressure,
5971 so less registers should be used for argument passing.
5972 This functionality can be overriden by an explicit
5973 regparm value. */
5974 for (regno = AX_REG; regno <= DI_REG; regno++)
5975 if (fixed_regs[regno])
5976 globals++;
5978 local_regparm
5979 = globals < local_regparm ? local_regparm - globals : 0;
5981 if (local_regparm > regparm)
5982 regparm = local_regparm;
5987 return regparm;
5990 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5991 DFmode (2) arguments in SSE registers for a function with the
5992 indicated TYPE and DECL. DECL may be NULL when calling function
5993 indirectly or considering a libcall. Return -1 if any FP parameter
5994 should be rejected by error. This is used in siutation we imply SSE
5995 calling convetion but the function is called from another function with
5996 SSE disabled. Otherwise return 0. */
5998 static int
5999 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
6001 gcc_assert (!TARGET_64BIT);
6003 /* Use SSE registers to pass SFmode and DFmode arguments if requested
6004 by the sseregparm attribute. */
6005 if (TARGET_SSEREGPARM
6006 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
6008 if (!TARGET_SSE)
6010 if (warn)
6012 if (decl)
6013 error ("calling %qD with attribute sseregparm without "
6014 "SSE/SSE2 enabled", decl);
6015 else
6016 error ("calling %qT with attribute sseregparm without "
6017 "SSE/SSE2 enabled", type);
6019 return 0;
6022 return 2;
6025 if (!decl)
6026 return 0;
6028 cgraph_node *target = cgraph_node::get (decl);
6029 if (target)
6030 target = target->function_symbol ();
6032 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
6033 (and DFmode for SSE2) arguments in SSE registers. */
6034 if (target
6035 /* TARGET_SSE_MATH */
6036 && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
6037 && opt_for_fn (target->decl, optimize)
6038 && !(profile_flag && !flag_fentry))
6040 cgraph_local_info *i = &target->local;
6041 if (i && i->local && i->can_change_signature)
6043 /* Refuse to produce wrong code when local function with SSE enabled
6044 is called from SSE disabled function.
6045 FIXME: We need a way to detect these cases cross-ltrans partition
6046 and avoid using SSE calling conventions on local functions called
6047 from function with SSE disabled. For now at least delay the
6048 warning until we know we are going to produce wrong code.
6049 See PR66047 */
6050 if (!TARGET_SSE && warn)
6051 return -1;
6052 return TARGET_SSE2_P (target_opts_for_fn (target->decl)
6053 ->x_ix86_isa_flags) ? 2 : 1;
6057 return 0;
6060 /* Return true if EAX is live at the start of the function. Used by
6061 ix86_expand_prologue to determine if we need special help before
6062 calling allocate_stack_worker. */
6064 static bool
6065 ix86_eax_live_at_start_p (void)
6067 /* Cheat. Don't bother working forward from ix86_function_regparm
6068 to the function type to whether an actual argument is located in
6069 eax. Instead just look at cfg info, which is still close enough
6070 to correct at this point. This gives false positives for broken
6071 functions that might use uninitialized data that happens to be
6072 allocated in eax, but who cares? */
6073 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
6076 static bool
6077 ix86_keep_aggregate_return_pointer (tree fntype)
6079 tree attr;
6081 if (!TARGET_64BIT)
6083 attr = lookup_attribute ("callee_pop_aggregate_return",
6084 TYPE_ATTRIBUTES (fntype));
6085 if (attr)
6086 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
6088 /* For 32-bit MS-ABI the default is to keep aggregate
6089 return pointer. */
6090 if (ix86_function_type_abi (fntype) == MS_ABI)
6091 return true;
6093 return KEEP_AGGREGATE_RETURN_POINTER != 0;
6096 /* Value is the number of bytes of arguments automatically
6097 popped when returning from a subroutine call.
6098 FUNDECL is the declaration node of the function (as a tree),
6099 FUNTYPE is the data type of the function (as a tree),
6100 or for a library call it is an identifier node for the subroutine name.
6101 SIZE is the number of bytes of arguments passed on the stack.
6103 On the 80386, the RTD insn may be used to pop them if the number
6104 of args is fixed, but if the number is variable then the caller
6105 must pop them all. RTD can't be used for library calls now
6106 because the library is compiled with the Unix compiler.
6107 Use of RTD is a selectable option, since it is incompatible with
6108 standard Unix calling sequences. If the option is not selected,
6109 the caller must always pop the args.
6111 The attribute stdcall is equivalent to RTD on a per module basis. */
6113 static int
6114 ix86_return_pops_args (tree fundecl, tree funtype, int size)
6116 unsigned int ccvt;
6118 /* None of the 64-bit ABIs pop arguments. */
6119 if (TARGET_64BIT)
6120 return 0;
6122 ccvt = ix86_get_callcvt (funtype);
6124 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
6125 | IX86_CALLCVT_THISCALL)) != 0
6126 && ! stdarg_p (funtype))
6127 return size;
6129 /* Lose any fake structure return argument if it is passed on the stack. */
6130 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
6131 && !ix86_keep_aggregate_return_pointer (funtype))
6133 int nregs = ix86_function_regparm (funtype, fundecl);
6134 if (nregs == 0)
6135 return GET_MODE_SIZE (Pmode);
6138 return 0;
6141 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
6143 static bool
6144 ix86_legitimate_combined_insn (rtx_insn *insn)
6146 /* Check operand constraints in case hard registers were propagated
6147 into insn pattern. This check prevents combine pass from
6148 generating insn patterns with invalid hard register operands.
6149 These invalid insns can eventually confuse reload to error out
6150 with a spill failure. See also PRs 46829 and 46843. */
6151 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
6153 int i;
6155 extract_insn (insn);
6156 preprocess_constraints (insn);
6158 int n_operands = recog_data.n_operands;
6159 int n_alternatives = recog_data.n_alternatives;
6160 for (i = 0; i < n_operands; i++)
6162 rtx op = recog_data.operand[i];
6163 machine_mode mode = GET_MODE (op);
6164 const operand_alternative *op_alt;
6165 int offset = 0;
6166 bool win;
6167 int j;
6169 /* For pre-AVX disallow unaligned loads/stores where the
6170 instructions don't support it. */
6171 if (!TARGET_AVX
6172 && VECTOR_MODE_P (GET_MODE (op))
6173 && misaligned_operand (op, GET_MODE (op)))
6175 int min_align = get_attr_ssememalign (insn);
6176 if (min_align == 0)
6177 return false;
6180 /* A unary operator may be accepted by the predicate, but it
6181 is irrelevant for matching constraints. */
6182 if (UNARY_P (op))
6183 op = XEXP (op, 0);
6185 if (SUBREG_P (op))
6187 if (REG_P (SUBREG_REG (op))
6188 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
6189 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
6190 GET_MODE (SUBREG_REG (op)),
6191 SUBREG_BYTE (op),
6192 GET_MODE (op));
6193 op = SUBREG_REG (op);
6196 if (!(REG_P (op) && HARD_REGISTER_P (op)))
6197 continue;
6199 op_alt = recog_op_alt;
6201 /* Operand has no constraints, anything is OK. */
6202 win = !n_alternatives;
6204 alternative_mask preferred = get_preferred_alternatives (insn);
6205 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
6207 if (!TEST_BIT (preferred, j))
6208 continue;
6209 if (op_alt[i].anything_ok
6210 || (op_alt[i].matches != -1
6211 && operands_match_p
6212 (recog_data.operand[i],
6213 recog_data.operand[op_alt[i].matches]))
6214 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
6216 win = true;
6217 break;
6221 if (!win)
6222 return false;
6226 return true;
6229 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
6231 static unsigned HOST_WIDE_INT
6232 ix86_asan_shadow_offset (void)
6234 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
6235 : HOST_WIDE_INT_C (0x7fff8000))
6236 : (HOST_WIDE_INT_1 << 29);
6239 /* Argument support functions. */
6241 /* Return true when register may be used to pass function parameters. */
6242 bool
6243 ix86_function_arg_regno_p (int regno)
6245 int i;
6246 enum calling_abi call_abi;
6247 const int *parm_regs;
6249 if (TARGET_MPX && BND_REGNO_P (regno))
6250 return true;
6252 if (!TARGET_64BIT)
6254 if (TARGET_MACHO)
6255 return (regno < REGPARM_MAX
6256 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
6257 else
6258 return (regno < REGPARM_MAX
6259 || (TARGET_MMX && MMX_REGNO_P (regno)
6260 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
6261 || (TARGET_SSE && SSE_REGNO_P (regno)
6262 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
6265 if (TARGET_SSE && SSE_REGNO_P (regno)
6266 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
6267 return true;
6269 /* TODO: The function should depend on current function ABI but
6270 builtins.c would need updating then. Therefore we use the
6271 default ABI. */
6272 call_abi = ix86_cfun_abi ();
6274 /* RAX is used as hidden argument to va_arg functions. */
6275 if (call_abi == SYSV_ABI && regno == AX_REG)
6276 return true;
6278 if (call_abi == MS_ABI)
6279 parm_regs = x86_64_ms_abi_int_parameter_registers;
6280 else
6281 parm_regs = x86_64_int_parameter_registers;
6283 for (i = 0; i < (call_abi == MS_ABI
6284 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
6285 if (regno == parm_regs[i])
6286 return true;
6287 return false;
6290 /* Return if we do not know how to pass TYPE solely in registers. */
6292 static bool
6293 ix86_must_pass_in_stack (machine_mode mode, const_tree type)
6295 if (must_pass_in_stack_var_size_or_pad (mode, type))
6296 return true;
6298 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
6299 The layout_type routine is crafty and tries to trick us into passing
6300 currently unsupported vector types on the stack by using TImode. */
6301 return (!TARGET_64BIT && mode == TImode
6302 && type && TREE_CODE (type) != VECTOR_TYPE);
6305 /* It returns the size, in bytes, of the area reserved for arguments passed
6306 in registers for the function represented by fndecl dependent to the used
6307 abi format. */
6309 ix86_reg_parm_stack_space (const_tree fndecl)
6311 enum calling_abi call_abi = SYSV_ABI;
6312 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
6313 call_abi = ix86_function_abi (fndecl);
6314 else
6315 call_abi = ix86_function_type_abi (fndecl);
6316 if (TARGET_64BIT && call_abi == MS_ABI)
6317 return 32;
6318 return 0;
6321 /* We add this as a workaround in order to use libc_has_function
6322 hook in i386.md. */
6323 bool
6324 ix86_libc_has_function (enum function_class fn_class)
6326 return targetm.libc_has_function (fn_class);
6329 /* Returns value SYSV_ABI, MS_ABI dependent on fntype,
6330 specifying the call abi used. */
6331 enum calling_abi
6332 ix86_function_type_abi (const_tree fntype)
6334 enum calling_abi abi = ix86_abi;
6336 if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE)
6337 return abi;
6339 if (abi == SYSV_ABI
6340 && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6342 if (TARGET_X32)
6343 error ("X32 does not support ms_abi attribute");
6345 abi = MS_ABI;
6347 else if (abi == MS_ABI
6348 && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6349 abi = SYSV_ABI;
6351 return abi;
6354 static enum calling_abi
6355 ix86_function_abi (const_tree fndecl)
6357 return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi;
6360 /* Returns value SYSV_ABI, MS_ABI dependent on cfun,
6361 specifying the call abi used. */
6362 enum calling_abi
6363 ix86_cfun_abi (void)
6365 return cfun ? cfun->machine->call_abi : ix86_abi;
6368 static bool
6369 ix86_function_ms_hook_prologue (const_tree fn)
6371 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6373 if (decl_function_context (fn) != NULL_TREE)
6374 error_at (DECL_SOURCE_LOCATION (fn),
6375 "ms_hook_prologue is not compatible with nested function");
6376 else
6377 return true;
6379 return false;
6382 /* Write the extra assembler code needed to declare a function properly. */
6384 void
6385 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6386 tree decl)
6388 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6390 if (is_ms_hook)
6392 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6393 unsigned int filler_cc = 0xcccccccc;
6395 for (i = 0; i < filler_count; i += 4)
6396 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6399 #ifdef SUBTARGET_ASM_UNWIND_INIT
6400 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6401 #endif
6403 ASM_OUTPUT_LABEL (asm_out_file, fname);
6405 /* Output magic byte marker, if hot-patch attribute is set. */
6406 if (is_ms_hook)
6408 if (TARGET_64BIT)
6410 /* leaq [%rsp + 0], %rsp */
6411 asm_fprintf (asm_out_file, ASM_BYTE
6412 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6414 else
6416 /* movl.s %edi, %edi
6417 push %ebp
6418 movl.s %esp, %ebp */
6419 asm_fprintf (asm_out_file, ASM_BYTE
6420 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6425 /* regclass.c */
6426 extern void init_regs (void);
6428 /* Implementation of call abi switching target hook. Specific to FNDECL
6429 the specific call register sets are set. See also
6430 ix86_conditional_register_usage for more details. */
6431 void
6432 ix86_call_abi_override (const_tree fndecl)
6434 cfun->machine->call_abi = ix86_function_abi (fndecl);
6437 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6438 expensive re-initialization of init_regs each time we switch function context
6439 since this is needed only during RTL expansion. */
6440 static void
6441 ix86_maybe_switch_abi (void)
6443 if (TARGET_64BIT &&
6444 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6445 reinit_regs ();
6448 /* Return 1 if pseudo register should be created and used to hold
6449 GOT address for PIC code. */
6450 bool
6451 ix86_use_pseudo_pic_reg (void)
6453 if ((TARGET_64BIT
6454 && (ix86_cmodel == CM_SMALL_PIC
6455 || TARGET_PECOFF))
6456 || !flag_pic)
6457 return false;
6458 return true;
6461 /* Initialize large model PIC register. */
6463 static void
6464 ix86_init_large_pic_reg (unsigned int tmp_regno)
6466 rtx_code_label *label;
6467 rtx tmp_reg;
6469 gcc_assert (Pmode == DImode);
6470 label = gen_label_rtx ();
6471 emit_label (label);
6472 LABEL_PRESERVE_P (label) = 1;
6473 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
6474 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
6475 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
6476 label));
6477 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6478 emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
6479 pic_offset_table_rtx, tmp_reg));
6482 /* Create and initialize PIC register if required. */
6483 static void
6484 ix86_init_pic_reg (void)
6486 edge entry_edge;
6487 rtx_insn *seq;
6489 if (!ix86_use_pseudo_pic_reg ())
6490 return;
6492 start_sequence ();
6494 if (TARGET_64BIT)
6496 if (ix86_cmodel == CM_LARGE_PIC)
6497 ix86_init_large_pic_reg (R11_REG);
6498 else
6499 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6501 else
6503 /* If there is future mcount call in the function it is more profitable
6504 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
6505 rtx reg = crtl->profile
6506 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
6507 : pic_offset_table_rtx;
6508 rtx_insn *insn = emit_insn (gen_set_got (reg));
6509 RTX_FRAME_RELATED_P (insn) = 1;
6510 if (crtl->profile)
6511 emit_move_insn (pic_offset_table_rtx, reg);
6512 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
6515 seq = get_insns ();
6516 end_sequence ();
6518 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6519 insert_insn_on_edge (seq, entry_edge);
6520 commit_one_edge_insertion (entry_edge);
6523 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6524 for a call to a function whose data type is FNTYPE.
6525 For a library call, FNTYPE is 0. */
6527 void
6528 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6529 tree fntype, /* tree ptr for function decl */
6530 rtx libname, /* SYMBOL_REF of library name or 0 */
6531 tree fndecl,
6532 int caller)
6534 struct cgraph_local_info *i = NULL;
6535 struct cgraph_node *target = NULL;
6537 memset (cum, 0, sizeof (*cum));
6539 if (fndecl)
6541 target = cgraph_node::get (fndecl);
6542 if (target)
6544 target = target->function_symbol ();
6545 i = cgraph_node::local_info (target->decl);
6546 cum->call_abi = ix86_function_abi (target->decl);
6548 else
6549 cum->call_abi = ix86_function_abi (fndecl);
6551 else
6552 cum->call_abi = ix86_function_type_abi (fntype);
6554 cum->caller = caller;
6556 /* Set up the number of registers to use for passing arguments. */
6557 cum->nregs = ix86_regparm;
6558 if (TARGET_64BIT)
6560 cum->nregs = (cum->call_abi == SYSV_ABI
6561 ? X86_64_REGPARM_MAX
6562 : X86_64_MS_REGPARM_MAX);
6564 if (TARGET_SSE)
6566 cum->sse_nregs = SSE_REGPARM_MAX;
6567 if (TARGET_64BIT)
6569 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6570 ? X86_64_SSE_REGPARM_MAX
6571 : X86_64_MS_SSE_REGPARM_MAX);
6574 if (TARGET_MMX)
6575 cum->mmx_nregs = MMX_REGPARM_MAX;
6576 cum->warn_avx512f = true;
6577 cum->warn_avx = true;
6578 cum->warn_sse = true;
6579 cum->warn_mmx = true;
6581 /* Because type might mismatch in between caller and callee, we need to
6582 use actual type of function for local calls.
6583 FIXME: cgraph_analyze can be told to actually record if function uses
6584 va_start so for local functions maybe_vaarg can be made aggressive
6585 helping K&R code.
6586 FIXME: once typesytem is fixed, we won't need this code anymore. */
6587 if (i && i->local && i->can_change_signature)
6588 fntype = TREE_TYPE (target->decl);
6589 cum->stdarg = stdarg_p (fntype);
6590 cum->maybe_vaarg = (fntype
6591 ? (!prototype_p (fntype) || stdarg_p (fntype))
6592 : !libname);
6594 cum->bnd_regno = FIRST_BND_REG;
6595 cum->bnds_in_bt = 0;
6596 cum->force_bnd_pass = 0;
6597 cum->decl = fndecl;
6599 if (!TARGET_64BIT)
6601 /* If there are variable arguments, then we won't pass anything
6602 in registers in 32-bit mode. */
6603 if (stdarg_p (fntype))
6605 cum->nregs = 0;
6606 /* Since in 32-bit, variable arguments are always passed on
6607 stack, there is scratch register available for indirect
6608 sibcall. */
6609 cfun->machine->arg_reg_available = true;
6610 cum->sse_nregs = 0;
6611 cum->mmx_nregs = 0;
6612 cum->warn_avx512f = false;
6613 cum->warn_avx = false;
6614 cum->warn_sse = false;
6615 cum->warn_mmx = false;
6616 return;
6619 /* Use ecx and edx registers if function has fastcall attribute,
6620 else look for regparm information. */
6621 if (fntype)
6623 unsigned int ccvt = ix86_get_callcvt (fntype);
6624 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6626 cum->nregs = 1;
6627 cum->fastcall = 1; /* Same first register as in fastcall. */
6629 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6631 cum->nregs = 2;
6632 cum->fastcall = 1;
6634 else
6635 cum->nregs = ix86_function_regparm (fntype, fndecl);
6638 /* Set up the number of SSE registers used for passing SFmode
6639 and DFmode arguments. Warn for mismatching ABI. */
6640 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6643 cfun->machine->arg_reg_available = (cum->nregs > 0);
6646 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6647 But in the case of vector types, it is some vector mode.
6649 When we have only some of our vector isa extensions enabled, then there
6650 are some modes for which vector_mode_supported_p is false. For these
6651 modes, the generic vector support in gcc will choose some non-vector mode
6652 in order to implement the type. By computing the natural mode, we'll
6653 select the proper ABI location for the operand and not depend on whatever
6654 the middle-end decides to do with these vector types.
6656 The midde-end can't deal with the vector types > 16 bytes. In this
6657 case, we return the original mode and warn ABI change if CUM isn't
6658 NULL.
6660 If INT_RETURN is true, warn ABI change if the vector mode isn't
6661 available for function return value. */
6663 static machine_mode
6664 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6665 bool in_return)
6667 machine_mode mode = TYPE_MODE (type);
6669 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6671 HOST_WIDE_INT size = int_size_in_bytes (type);
6672 if ((size == 8 || size == 16 || size == 32 || size == 64)
6673 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6674 && TYPE_VECTOR_SUBPARTS (type) > 1)
6676 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6678 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6679 mode = MIN_MODE_VECTOR_FLOAT;
6680 else
6681 mode = MIN_MODE_VECTOR_INT;
6683 /* Get the mode which has this inner mode and number of units. */
6684 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6685 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6686 && GET_MODE_INNER (mode) == innermode)
6688 if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU)
6690 static bool warnedavx512f;
6691 static bool warnedavx512f_ret;
6693 if (cum && cum->warn_avx512f && !warnedavx512f)
6695 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6696 "without AVX512F enabled changes the ABI"))
6697 warnedavx512f = true;
6699 else if (in_return && !warnedavx512f_ret)
6701 if (warning (OPT_Wpsabi, "AVX512F vector return "
6702 "without AVX512F enabled changes the ABI"))
6703 warnedavx512f_ret = true;
6706 return TYPE_MODE (type);
6708 else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU)
6710 static bool warnedavx;
6711 static bool warnedavx_ret;
6713 if (cum && cum->warn_avx && !warnedavx)
6715 if (warning (OPT_Wpsabi, "AVX vector argument "
6716 "without AVX enabled changes the ABI"))
6717 warnedavx = true;
6719 else if (in_return && !warnedavx_ret)
6721 if (warning (OPT_Wpsabi, "AVX vector return "
6722 "without AVX enabled changes the ABI"))
6723 warnedavx_ret = true;
6726 return TYPE_MODE (type);
6728 else if (((size == 8 && TARGET_64BIT) || size == 16)
6729 && !TARGET_SSE
6730 && !TARGET_IAMCU)
6732 static bool warnedsse;
6733 static bool warnedsse_ret;
6735 if (cum && cum->warn_sse && !warnedsse)
6737 if (warning (OPT_Wpsabi, "SSE vector argument "
6738 "without SSE enabled changes the ABI"))
6739 warnedsse = true;
6741 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6743 if (warning (OPT_Wpsabi, "SSE vector return "
6744 "without SSE enabled changes the ABI"))
6745 warnedsse_ret = true;
6748 else if ((size == 8 && !TARGET_64BIT)
6749 && !TARGET_MMX
6750 && !TARGET_IAMCU)
6752 static bool warnedmmx;
6753 static bool warnedmmx_ret;
6755 if (cum && cum->warn_mmx && !warnedmmx)
6757 if (warning (OPT_Wpsabi, "MMX vector argument "
6758 "without MMX enabled changes the ABI"))
6759 warnedmmx = true;
6761 else if (in_return && !warnedmmx_ret)
6763 if (warning (OPT_Wpsabi, "MMX vector return "
6764 "without MMX enabled changes the ABI"))
6765 warnedmmx_ret = true;
6768 return mode;
6771 gcc_unreachable ();
6775 return mode;
6778 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6779 this may not agree with the mode that the type system has chosen for the
6780 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6781 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6783 static rtx
6784 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
6785 unsigned int regno)
6787 rtx tmp;
6789 if (orig_mode != BLKmode)
6790 tmp = gen_rtx_REG (orig_mode, regno);
6791 else
6793 tmp = gen_rtx_REG (mode, regno);
6794 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6795 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6798 return tmp;
6801 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6802 of this code is to classify each 8bytes of incoming argument by the register
6803 class and assign registers accordingly. */
6805 /* Return the union class of CLASS1 and CLASS2.
6806 See the x86-64 PS ABI for details. */
6808 static enum x86_64_reg_class
6809 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6811 /* Rule #1: If both classes are equal, this is the resulting class. */
6812 if (class1 == class2)
6813 return class1;
6815 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6816 the other class. */
6817 if (class1 == X86_64_NO_CLASS)
6818 return class2;
6819 if (class2 == X86_64_NO_CLASS)
6820 return class1;
6822 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6823 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6824 return X86_64_MEMORY_CLASS;
6826 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6827 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6828 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6829 return X86_64_INTEGERSI_CLASS;
6830 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6831 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6832 return X86_64_INTEGER_CLASS;
6834 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6835 MEMORY is used. */
6836 if (class1 == X86_64_X87_CLASS
6837 || class1 == X86_64_X87UP_CLASS
6838 || class1 == X86_64_COMPLEX_X87_CLASS
6839 || class2 == X86_64_X87_CLASS
6840 || class2 == X86_64_X87UP_CLASS
6841 || class2 == X86_64_COMPLEX_X87_CLASS)
6842 return X86_64_MEMORY_CLASS;
6844 /* Rule #6: Otherwise class SSE is used. */
6845 return X86_64_SSE_CLASS;
6848 /* Classify the argument of type TYPE and mode MODE.
6849 CLASSES will be filled by the register class used to pass each word
6850 of the operand. The number of words is returned. In case the parameter
6851 should be passed in memory, 0 is returned. As a special case for zero
6852 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6854 BIT_OFFSET is used internally for handling records and specifies offset
6855 of the offset in bits modulo 512 to avoid overflow cases.
6857 See the x86-64 PS ABI for details.
6860 static int
6861 classify_argument (machine_mode mode, const_tree type,
6862 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6864 HOST_WIDE_INT bytes =
6865 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6866 int words
6867 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6869 /* Variable sized entities are always passed/returned in memory. */
6870 if (bytes < 0)
6871 return 0;
6873 if (mode != VOIDmode
6874 && targetm.calls.must_pass_in_stack (mode, type))
6875 return 0;
6877 if (type && AGGREGATE_TYPE_P (type))
6879 int i;
6880 tree field;
6881 enum x86_64_reg_class subclasses[MAX_CLASSES];
6883 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6884 if (bytes > 64)
6885 return 0;
6887 for (i = 0; i < words; i++)
6888 classes[i] = X86_64_NO_CLASS;
6890 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6891 signalize memory class, so handle it as special case. */
6892 if (!words)
6894 classes[0] = X86_64_NO_CLASS;
6895 return 1;
6898 /* Classify each field of record and merge classes. */
6899 switch (TREE_CODE (type))
6901 case RECORD_TYPE:
6902 /* And now merge the fields of structure. */
6903 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6905 if (TREE_CODE (field) == FIELD_DECL)
6907 int num;
6909 if (TREE_TYPE (field) == error_mark_node)
6910 continue;
6912 /* Bitfields are always classified as integer. Handle them
6913 early, since later code would consider them to be
6914 misaligned integers. */
6915 if (DECL_BIT_FIELD (field))
6917 for (i = (int_bit_position (field)
6918 + (bit_offset % 64)) / 8 / 8;
6919 i < ((int_bit_position (field) + (bit_offset % 64))
6920 + tree_to_shwi (DECL_SIZE (field))
6921 + 63) / 8 / 8; i++)
6922 classes[i] =
6923 merge_classes (X86_64_INTEGER_CLASS,
6924 classes[i]);
6926 else
6928 int pos;
6930 type = TREE_TYPE (field);
6932 /* Flexible array member is ignored. */
6933 if (TYPE_MODE (type) == BLKmode
6934 && TREE_CODE (type) == ARRAY_TYPE
6935 && TYPE_SIZE (type) == NULL_TREE
6936 && TYPE_DOMAIN (type) != NULL_TREE
6937 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6938 == NULL_TREE))
6940 static bool warned;
6942 if (!warned && warn_psabi)
6944 warned = true;
6945 inform (input_location,
6946 "the ABI of passing struct with"
6947 " a flexible array member has"
6948 " changed in GCC 4.4");
6950 continue;
6952 num = classify_argument (TYPE_MODE (type), type,
6953 subclasses,
6954 (int_bit_position (field)
6955 + bit_offset) % 512);
6956 if (!num)
6957 return 0;
6958 pos = (int_bit_position (field)
6959 + (bit_offset % 64)) / 8 / 8;
6960 for (i = 0; i < num && (i + pos) < words; i++)
6961 classes[i + pos] =
6962 merge_classes (subclasses[i], classes[i + pos]);
6966 break;
6968 case ARRAY_TYPE:
6969 /* Arrays are handled as small records. */
6971 int num;
6972 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6973 TREE_TYPE (type), subclasses, bit_offset);
6974 if (!num)
6975 return 0;
6977 /* The partial classes are now full classes. */
6978 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6979 subclasses[0] = X86_64_SSE_CLASS;
6980 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6981 && !((bit_offset % 64) == 0 && bytes == 4))
6982 subclasses[0] = X86_64_INTEGER_CLASS;
6984 for (i = 0; i < words; i++)
6985 classes[i] = subclasses[i % num];
6987 break;
6989 case UNION_TYPE:
6990 case QUAL_UNION_TYPE:
6991 /* Unions are similar to RECORD_TYPE but offset is always 0.
6993 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6995 if (TREE_CODE (field) == FIELD_DECL)
6997 int num;
6999 if (TREE_TYPE (field) == error_mark_node)
7000 continue;
7002 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
7003 TREE_TYPE (field), subclasses,
7004 bit_offset);
7005 if (!num)
7006 return 0;
7007 for (i = 0; i < num && i < words; i++)
7008 classes[i] = merge_classes (subclasses[i], classes[i]);
7011 break;
7013 default:
7014 gcc_unreachable ();
7017 if (words > 2)
7019 /* When size > 16 bytes, if the first one isn't
7020 X86_64_SSE_CLASS or any other ones aren't
7021 X86_64_SSEUP_CLASS, everything should be passed in
7022 memory. */
7023 if (classes[0] != X86_64_SSE_CLASS)
7024 return 0;
7026 for (i = 1; i < words; i++)
7027 if (classes[i] != X86_64_SSEUP_CLASS)
7028 return 0;
7031 /* Final merger cleanup. */
7032 for (i = 0; i < words; i++)
7034 /* If one class is MEMORY, everything should be passed in
7035 memory. */
7036 if (classes[i] == X86_64_MEMORY_CLASS)
7037 return 0;
7039 /* The X86_64_SSEUP_CLASS should be always preceded by
7040 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
7041 if (classes[i] == X86_64_SSEUP_CLASS
7042 && classes[i - 1] != X86_64_SSE_CLASS
7043 && classes[i - 1] != X86_64_SSEUP_CLASS)
7045 /* The first one should never be X86_64_SSEUP_CLASS. */
7046 gcc_assert (i != 0);
7047 classes[i] = X86_64_SSE_CLASS;
7050 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
7051 everything should be passed in memory. */
7052 if (classes[i] == X86_64_X87UP_CLASS
7053 && (classes[i - 1] != X86_64_X87_CLASS))
7055 static bool warned;
7057 /* The first one should never be X86_64_X87UP_CLASS. */
7058 gcc_assert (i != 0);
7059 if (!warned && warn_psabi)
7061 warned = true;
7062 inform (input_location,
7063 "the ABI of passing union with long double"
7064 " has changed in GCC 4.4");
7066 return 0;
7069 return words;
7072 /* Compute alignment needed. We align all types to natural boundaries with
7073 exception of XFmode that is aligned to 64bits. */
7074 if (mode != VOIDmode && mode != BLKmode)
7076 int mode_alignment = GET_MODE_BITSIZE (mode);
7078 if (mode == XFmode)
7079 mode_alignment = 128;
7080 else if (mode == XCmode)
7081 mode_alignment = 256;
7082 if (COMPLEX_MODE_P (mode))
7083 mode_alignment /= 2;
7084 /* Misaligned fields are always returned in memory. */
7085 if (bit_offset % mode_alignment)
7086 return 0;
7089 /* for V1xx modes, just use the base mode */
7090 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
7091 && GET_MODE_UNIT_SIZE (mode) == bytes)
7092 mode = GET_MODE_INNER (mode);
7094 /* Classification of atomic types. */
7095 switch (mode)
7097 case SDmode:
7098 case DDmode:
7099 classes[0] = X86_64_SSE_CLASS;
7100 return 1;
7101 case TDmode:
7102 classes[0] = X86_64_SSE_CLASS;
7103 classes[1] = X86_64_SSEUP_CLASS;
7104 return 2;
7105 case DImode:
7106 case SImode:
7107 case HImode:
7108 case QImode:
7109 case CSImode:
7110 case CHImode:
7111 case CQImode:
7113 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
7115 /* Analyze last 128 bits only. */
7116 size = (size - 1) & 0x7f;
7118 if (size < 32)
7120 classes[0] = X86_64_INTEGERSI_CLASS;
7121 return 1;
7123 else if (size < 64)
7125 classes[0] = X86_64_INTEGER_CLASS;
7126 return 1;
7128 else if (size < 64+32)
7130 classes[0] = X86_64_INTEGER_CLASS;
7131 classes[1] = X86_64_INTEGERSI_CLASS;
7132 return 2;
7134 else if (size < 64+64)
7136 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7137 return 2;
7139 else
7140 gcc_unreachable ();
7142 case CDImode:
7143 case TImode:
7144 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7145 return 2;
7146 case COImode:
7147 case OImode:
7148 /* OImode shouldn't be used directly. */
7149 gcc_unreachable ();
7150 case CTImode:
7151 return 0;
7152 case SFmode:
7153 if (!(bit_offset % 64))
7154 classes[0] = X86_64_SSESF_CLASS;
7155 else
7156 classes[0] = X86_64_SSE_CLASS;
7157 return 1;
7158 case DFmode:
7159 classes[0] = X86_64_SSEDF_CLASS;
7160 return 1;
7161 case XFmode:
7162 classes[0] = X86_64_X87_CLASS;
7163 classes[1] = X86_64_X87UP_CLASS;
7164 return 2;
7165 case TFmode:
7166 classes[0] = X86_64_SSE_CLASS;
7167 classes[1] = X86_64_SSEUP_CLASS;
7168 return 2;
7169 case SCmode:
7170 classes[0] = X86_64_SSE_CLASS;
7171 if (!(bit_offset % 64))
7172 return 1;
7173 else
7175 static bool warned;
7177 if (!warned && warn_psabi)
7179 warned = true;
7180 inform (input_location,
7181 "the ABI of passing structure with complex float"
7182 " member has changed in GCC 4.4");
7184 classes[1] = X86_64_SSESF_CLASS;
7185 return 2;
7187 case DCmode:
7188 classes[0] = X86_64_SSEDF_CLASS;
7189 classes[1] = X86_64_SSEDF_CLASS;
7190 return 2;
7191 case XCmode:
7192 classes[0] = X86_64_COMPLEX_X87_CLASS;
7193 return 1;
7194 case TCmode:
7195 /* This modes is larger than 16 bytes. */
7196 return 0;
7197 case V8SFmode:
7198 case V8SImode:
7199 case V32QImode:
7200 case V16HImode:
7201 case V4DFmode:
7202 case V4DImode:
7203 classes[0] = X86_64_SSE_CLASS;
7204 classes[1] = X86_64_SSEUP_CLASS;
7205 classes[2] = X86_64_SSEUP_CLASS;
7206 classes[3] = X86_64_SSEUP_CLASS;
7207 return 4;
7208 case V8DFmode:
7209 case V16SFmode:
7210 case V8DImode:
7211 case V16SImode:
7212 case V32HImode:
7213 case V64QImode:
7214 classes[0] = X86_64_SSE_CLASS;
7215 classes[1] = X86_64_SSEUP_CLASS;
7216 classes[2] = X86_64_SSEUP_CLASS;
7217 classes[3] = X86_64_SSEUP_CLASS;
7218 classes[4] = X86_64_SSEUP_CLASS;
7219 classes[5] = X86_64_SSEUP_CLASS;
7220 classes[6] = X86_64_SSEUP_CLASS;
7221 classes[7] = X86_64_SSEUP_CLASS;
7222 return 8;
7223 case V4SFmode:
7224 case V4SImode:
7225 case V16QImode:
7226 case V8HImode:
7227 case V2DFmode:
7228 case V2DImode:
7229 classes[0] = X86_64_SSE_CLASS;
7230 classes[1] = X86_64_SSEUP_CLASS;
7231 return 2;
7232 case V1TImode:
7233 case V1DImode:
7234 case V2SFmode:
7235 case V2SImode:
7236 case V4HImode:
7237 case V8QImode:
7238 classes[0] = X86_64_SSE_CLASS;
7239 return 1;
7240 case BLKmode:
7241 case VOIDmode:
7242 return 0;
7243 default:
7244 gcc_assert (VECTOR_MODE_P (mode));
7246 if (bytes > 16)
7247 return 0;
7249 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
7251 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
7252 classes[0] = X86_64_INTEGERSI_CLASS;
7253 else
7254 classes[0] = X86_64_INTEGER_CLASS;
7255 classes[1] = X86_64_INTEGER_CLASS;
7256 return 1 + (bytes > 8);
7260 /* Examine the argument and return set number of register required in each
7261 class. Return true iff parameter should be passed in memory. */
7263 static bool
7264 examine_argument (machine_mode mode, const_tree type, int in_return,
7265 int *int_nregs, int *sse_nregs)
7267 enum x86_64_reg_class regclass[MAX_CLASSES];
7268 int n = classify_argument (mode, type, regclass, 0);
7270 *int_nregs = 0;
7271 *sse_nregs = 0;
7273 if (!n)
7274 return true;
7275 for (n--; n >= 0; n--)
7276 switch (regclass[n])
7278 case X86_64_INTEGER_CLASS:
7279 case X86_64_INTEGERSI_CLASS:
7280 (*int_nregs)++;
7281 break;
7282 case X86_64_SSE_CLASS:
7283 case X86_64_SSESF_CLASS:
7284 case X86_64_SSEDF_CLASS:
7285 (*sse_nregs)++;
7286 break;
7287 case X86_64_NO_CLASS:
7288 case X86_64_SSEUP_CLASS:
7289 break;
7290 case X86_64_X87_CLASS:
7291 case X86_64_X87UP_CLASS:
7292 case X86_64_COMPLEX_X87_CLASS:
7293 if (!in_return)
7294 return true;
7295 break;
7296 case X86_64_MEMORY_CLASS:
7297 gcc_unreachable ();
7300 return false;
7303 /* Construct container for the argument used by GCC interface. See
7304 FUNCTION_ARG for the detailed description. */
7306 static rtx
7307 construct_container (machine_mode mode, machine_mode orig_mode,
7308 const_tree type, int in_return, int nintregs, int nsseregs,
7309 const int *intreg, int sse_regno)
7311 /* The following variables hold the static issued_error state. */
7312 static bool issued_sse_arg_error;
7313 static bool issued_sse_ret_error;
7314 static bool issued_x87_ret_error;
7316 machine_mode tmpmode;
7317 int bytes =
7318 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
7319 enum x86_64_reg_class regclass[MAX_CLASSES];
7320 int n;
7321 int i;
7322 int nexps = 0;
7323 int needed_sseregs, needed_intregs;
7324 rtx exp[MAX_CLASSES];
7325 rtx ret;
7327 n = classify_argument (mode, type, regclass, 0);
7328 if (!n)
7329 return NULL;
7330 if (examine_argument (mode, type, in_return, &needed_intregs,
7331 &needed_sseregs))
7332 return NULL;
7333 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
7334 return NULL;
7336 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
7337 some less clueful developer tries to use floating-point anyway. */
7338 if (needed_sseregs && !TARGET_SSE)
7340 if (in_return)
7342 if (!issued_sse_ret_error)
7344 error ("SSE register return with SSE disabled");
7345 issued_sse_ret_error = true;
7348 else if (!issued_sse_arg_error)
7350 error ("SSE register argument with SSE disabled");
7351 issued_sse_arg_error = true;
7353 return NULL;
7356 /* Likewise, error if the ABI requires us to return values in the
7357 x87 registers and the user specified -mno-80387. */
7358 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
7359 for (i = 0; i < n; i++)
7360 if (regclass[i] == X86_64_X87_CLASS
7361 || regclass[i] == X86_64_X87UP_CLASS
7362 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
7364 if (!issued_x87_ret_error)
7366 error ("x87 register return with x87 disabled");
7367 issued_x87_ret_error = true;
7369 return NULL;
7372 /* First construct simple cases. Avoid SCmode, since we want to use
7373 single register to pass this type. */
7374 if (n == 1 && mode != SCmode)
7375 switch (regclass[0])
7377 case X86_64_INTEGER_CLASS:
7378 case X86_64_INTEGERSI_CLASS:
7379 return gen_rtx_REG (mode, intreg[0]);
7380 case X86_64_SSE_CLASS:
7381 case X86_64_SSESF_CLASS:
7382 case X86_64_SSEDF_CLASS:
7383 if (mode != BLKmode)
7384 return gen_reg_or_parallel (mode, orig_mode,
7385 SSE_REGNO (sse_regno));
7386 break;
7387 case X86_64_X87_CLASS:
7388 case X86_64_COMPLEX_X87_CLASS:
7389 return gen_rtx_REG (mode, FIRST_STACK_REG);
7390 case X86_64_NO_CLASS:
7391 /* Zero sized array, struct or class. */
7392 return NULL;
7393 default:
7394 gcc_unreachable ();
7396 if (n == 2
7397 && regclass[0] == X86_64_SSE_CLASS
7398 && regclass[1] == X86_64_SSEUP_CLASS
7399 && mode != BLKmode)
7400 return gen_reg_or_parallel (mode, orig_mode,
7401 SSE_REGNO (sse_regno));
7402 if (n == 4
7403 && regclass[0] == X86_64_SSE_CLASS
7404 && regclass[1] == X86_64_SSEUP_CLASS
7405 && regclass[2] == X86_64_SSEUP_CLASS
7406 && regclass[3] == X86_64_SSEUP_CLASS
7407 && mode != BLKmode)
7408 return gen_reg_or_parallel (mode, orig_mode,
7409 SSE_REGNO (sse_regno));
7410 if (n == 8
7411 && regclass[0] == X86_64_SSE_CLASS
7412 && regclass[1] == X86_64_SSEUP_CLASS
7413 && regclass[2] == X86_64_SSEUP_CLASS
7414 && regclass[3] == X86_64_SSEUP_CLASS
7415 && regclass[4] == X86_64_SSEUP_CLASS
7416 && regclass[5] == X86_64_SSEUP_CLASS
7417 && regclass[6] == X86_64_SSEUP_CLASS
7418 && regclass[7] == X86_64_SSEUP_CLASS
7419 && mode != BLKmode)
7420 return gen_reg_or_parallel (mode, orig_mode,
7421 SSE_REGNO (sse_regno));
7422 if (n == 2
7423 && regclass[0] == X86_64_X87_CLASS
7424 && regclass[1] == X86_64_X87UP_CLASS)
7425 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7427 if (n == 2
7428 && regclass[0] == X86_64_INTEGER_CLASS
7429 && regclass[1] == X86_64_INTEGER_CLASS
7430 && (mode == CDImode || mode == TImode)
7431 && intreg[0] + 1 == intreg[1])
7432 return gen_rtx_REG (mode, intreg[0]);
7434 /* Otherwise figure out the entries of the PARALLEL. */
7435 for (i = 0; i < n; i++)
7437 int pos;
7439 switch (regclass[i])
7441 case X86_64_NO_CLASS:
7442 break;
7443 case X86_64_INTEGER_CLASS:
7444 case X86_64_INTEGERSI_CLASS:
7445 /* Merge TImodes on aligned occasions here too. */
7446 if (i * 8 + 8 > bytes)
7447 tmpmode
7448 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7449 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7450 tmpmode = SImode;
7451 else
7452 tmpmode = DImode;
7453 /* We've requested 24 bytes we
7454 don't have mode for. Use DImode. */
7455 if (tmpmode == BLKmode)
7456 tmpmode = DImode;
7457 exp [nexps++]
7458 = gen_rtx_EXPR_LIST (VOIDmode,
7459 gen_rtx_REG (tmpmode, *intreg),
7460 GEN_INT (i*8));
7461 intreg++;
7462 break;
7463 case X86_64_SSESF_CLASS:
7464 exp [nexps++]
7465 = gen_rtx_EXPR_LIST (VOIDmode,
7466 gen_rtx_REG (SFmode,
7467 SSE_REGNO (sse_regno)),
7468 GEN_INT (i*8));
7469 sse_regno++;
7470 break;
7471 case X86_64_SSEDF_CLASS:
7472 exp [nexps++]
7473 = gen_rtx_EXPR_LIST (VOIDmode,
7474 gen_rtx_REG (DFmode,
7475 SSE_REGNO (sse_regno)),
7476 GEN_INT (i*8));
7477 sse_regno++;
7478 break;
7479 case X86_64_SSE_CLASS:
7480 pos = i;
7481 switch (n)
7483 case 1:
7484 tmpmode = DImode;
7485 break;
7486 case 2:
7487 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7489 tmpmode = TImode;
7490 i++;
7492 else
7493 tmpmode = DImode;
7494 break;
7495 case 4:
7496 gcc_assert (i == 0
7497 && regclass[1] == X86_64_SSEUP_CLASS
7498 && regclass[2] == X86_64_SSEUP_CLASS
7499 && regclass[3] == X86_64_SSEUP_CLASS);
7500 tmpmode = OImode;
7501 i += 3;
7502 break;
7503 case 8:
7504 gcc_assert (i == 0
7505 && regclass[1] == X86_64_SSEUP_CLASS
7506 && regclass[2] == X86_64_SSEUP_CLASS
7507 && regclass[3] == X86_64_SSEUP_CLASS
7508 && regclass[4] == X86_64_SSEUP_CLASS
7509 && regclass[5] == X86_64_SSEUP_CLASS
7510 && regclass[6] == X86_64_SSEUP_CLASS
7511 && regclass[7] == X86_64_SSEUP_CLASS);
7512 tmpmode = XImode;
7513 i += 7;
7514 break;
7515 default:
7516 gcc_unreachable ();
7518 exp [nexps++]
7519 = gen_rtx_EXPR_LIST (VOIDmode,
7520 gen_rtx_REG (tmpmode,
7521 SSE_REGNO (sse_regno)),
7522 GEN_INT (pos*8));
7523 sse_regno++;
7524 break;
7525 default:
7526 gcc_unreachable ();
7530 /* Empty aligned struct, union or class. */
7531 if (nexps == 0)
7532 return NULL;
7534 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7535 for (i = 0; i < nexps; i++)
7536 XVECEXP (ret, 0, i) = exp [i];
7537 return ret;
7540 /* Update the data in CUM to advance over an argument of mode MODE
7541 and data type TYPE. (TYPE is null for libcalls where that information
7542 may not be available.)
7544 Return a number of integer regsiters advanced over. */
7546 static int
7547 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
7548 const_tree type, HOST_WIDE_INT bytes,
7549 HOST_WIDE_INT words)
7551 int res = 0;
7552 bool error_p = NULL;
7554 if (TARGET_IAMCU)
7556 /* Intel MCU psABI passes scalars and aggregates no larger than 8
7557 bytes in registers. */
7558 if (!VECTOR_MODE_P (mode) && bytes <= 8)
7559 goto pass_in_reg;
7560 return res;
7563 switch (mode)
7565 default:
7566 break;
7568 case BLKmode:
7569 if (bytes < 0)
7570 break;
7571 /* FALLTHRU */
7573 case DImode:
7574 case SImode:
7575 case HImode:
7576 case QImode:
7577 pass_in_reg:
7578 cum->words += words;
7579 cum->nregs -= words;
7580 cum->regno += words;
7581 if (cum->nregs >= 0)
7582 res = words;
7583 if (cum->nregs <= 0)
7585 cum->nregs = 0;
7586 cfun->machine->arg_reg_available = false;
7587 cum->regno = 0;
7589 break;
7591 case OImode:
7592 /* OImode shouldn't be used directly. */
7593 gcc_unreachable ();
7595 case DFmode:
7596 if (cum->float_in_sse == -1)
7597 error_p = 1;
7598 if (cum->float_in_sse < 2)
7599 break;
7600 case SFmode:
7601 if (cum->float_in_sse == -1)
7602 error_p = 1;
7603 if (cum->float_in_sse < 1)
7604 break;
7605 /* FALLTHRU */
7607 case V8SFmode:
7608 case V8SImode:
7609 case V64QImode:
7610 case V32HImode:
7611 case V16SImode:
7612 case V8DImode:
7613 case V16SFmode:
7614 case V8DFmode:
7615 case V32QImode:
7616 case V16HImode:
7617 case V4DFmode:
7618 case V4DImode:
7619 case TImode:
7620 case V16QImode:
7621 case V8HImode:
7622 case V4SImode:
7623 case V2DImode:
7624 case V4SFmode:
7625 case V2DFmode:
7626 if (!type || !AGGREGATE_TYPE_P (type))
7628 cum->sse_words += words;
7629 cum->sse_nregs -= 1;
7630 cum->sse_regno += 1;
7631 if (cum->sse_nregs <= 0)
7633 cum->sse_nregs = 0;
7634 cum->sse_regno = 0;
7637 break;
7639 case V8QImode:
7640 case V4HImode:
7641 case V2SImode:
7642 case V2SFmode:
7643 case V1TImode:
7644 case V1DImode:
7645 if (!type || !AGGREGATE_TYPE_P (type))
7647 cum->mmx_words += words;
7648 cum->mmx_nregs -= 1;
7649 cum->mmx_regno += 1;
7650 if (cum->mmx_nregs <= 0)
7652 cum->mmx_nregs = 0;
7653 cum->mmx_regno = 0;
7656 break;
7658 if (error_p)
7660 cum->float_in_sse = 0;
7661 error ("calling %qD with SSE calling convention without "
7662 "SSE/SSE2 enabled", cum->decl);
7663 sorry ("this is a GCC bug that can be worked around by adding "
7664 "attribute used to function called");
7667 return res;
7670 static int
7671 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
7672 const_tree type, HOST_WIDE_INT words, bool named)
7674 int int_nregs, sse_nregs;
7676 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7677 if (!named && (VALID_AVX512F_REG_MODE (mode)
7678 || VALID_AVX256_REG_MODE (mode)))
7679 return 0;
7681 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7682 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7684 cum->nregs -= int_nregs;
7685 cum->sse_nregs -= sse_nregs;
7686 cum->regno += int_nregs;
7687 cum->sse_regno += sse_nregs;
7688 return int_nregs;
7690 else
7692 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7693 cum->words = (cum->words + align - 1) & ~(align - 1);
7694 cum->words += words;
7695 return 0;
7699 static int
7700 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7701 HOST_WIDE_INT words)
7703 /* Otherwise, this should be passed indirect. */
7704 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7706 cum->words += words;
7707 if (cum->nregs > 0)
7709 cum->nregs -= 1;
7710 cum->regno += 1;
7711 return 1;
7713 return 0;
7716 /* Update the data in CUM to advance over an argument of mode MODE and
7717 data type TYPE. (TYPE is null for libcalls where that information
7718 may not be available.) */
7720 static void
7721 ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7722 const_tree type, bool named)
7724 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7725 HOST_WIDE_INT bytes, words;
7726 int nregs;
7728 if (mode == BLKmode)
7729 bytes = int_size_in_bytes (type);
7730 else
7731 bytes = GET_MODE_SIZE (mode);
7732 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7734 if (type)
7735 mode = type_natural_mode (type, NULL, false);
7737 if ((type && POINTER_BOUNDS_TYPE_P (type))
7738 || POINTER_BOUNDS_MODE_P (mode))
7740 /* If we pass bounds in BT then just update remained bounds count. */
7741 if (cum->bnds_in_bt)
7743 cum->bnds_in_bt--;
7744 return;
7747 /* Update remained number of bounds to force. */
7748 if (cum->force_bnd_pass)
7749 cum->force_bnd_pass--;
7751 cum->bnd_regno++;
7753 return;
7756 /* The first arg not going to Bounds Tables resets this counter. */
7757 cum->bnds_in_bt = 0;
7758 /* For unnamed args we always pass bounds to avoid bounds mess when
7759 passed and received types do not match. If bounds do not follow
7760 unnamed arg, still pretend required number of bounds were passed. */
7761 if (cum->force_bnd_pass)
7763 cum->bnd_regno += cum->force_bnd_pass;
7764 cum->force_bnd_pass = 0;
7767 if (TARGET_64BIT)
7769 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
7771 if (call_abi == MS_ABI)
7772 nregs = function_arg_advance_ms_64 (cum, bytes, words);
7773 else
7774 nregs = function_arg_advance_64 (cum, mode, type, words, named);
7776 else
7777 nregs = function_arg_advance_32 (cum, mode, type, bytes, words);
7779 /* For stdarg we expect bounds to be passed for each value passed
7780 in register. */
7781 if (cum->stdarg)
7782 cum->force_bnd_pass = nregs;
7783 /* For pointers passed in memory we expect bounds passed in Bounds
7784 Table. */
7785 if (!nregs)
7786 cum->bnds_in_bt = chkp_type_bounds_count (type);
7789 /* Define where to put the arguments to a function.
7790 Value is zero to push the argument on the stack,
7791 or a hard register in which to store the argument.
7793 MODE is the argument's machine mode.
7794 TYPE is the data type of the argument (as a tree).
7795 This is null for libcalls where that information may
7796 not be available.
7797 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7798 the preceding args and about the function being called.
7799 NAMED is nonzero if this argument is a named parameter
7800 (otherwise it is an extra parameter matching an ellipsis). */
7802 static rtx
7803 function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
7804 machine_mode orig_mode, const_tree type,
7805 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7807 bool error_p = false;
7808 /* Avoid the AL settings for the Unix64 ABI. */
7809 if (mode == VOIDmode)
7810 return constm1_rtx;
7812 if (TARGET_IAMCU)
7814 /* Intel MCU psABI passes scalars and aggregates no larger than 8
7815 bytes in registers. */
7816 if (!VECTOR_MODE_P (mode) && bytes <= 8)
7817 goto pass_in_reg;
7818 return NULL_RTX;
7821 switch (mode)
7823 default:
7824 break;
7826 case BLKmode:
7827 if (bytes < 0)
7828 break;
7829 /* FALLTHRU */
7830 case DImode:
7831 case SImode:
7832 case HImode:
7833 case QImode:
7834 pass_in_reg:
7835 if (words <= cum->nregs)
7837 int regno = cum->regno;
7839 /* Fastcall allocates the first two DWORD (SImode) or
7840 smaller arguments to ECX and EDX if it isn't an
7841 aggregate type . */
7842 if (cum->fastcall)
7844 if (mode == BLKmode
7845 || mode == DImode
7846 || (type && AGGREGATE_TYPE_P (type)))
7847 break;
7849 /* ECX not EAX is the first allocated register. */
7850 if (regno == AX_REG)
7851 regno = CX_REG;
7853 return gen_rtx_REG (mode, regno);
7855 break;
7857 case DFmode:
7858 if (cum->float_in_sse == -1)
7859 error_p = 1;
7860 if (cum->float_in_sse < 2)
7861 break;
7862 case SFmode:
7863 if (cum->float_in_sse == -1)
7864 error_p = 1;
7865 if (cum->float_in_sse < 1)
7866 break;
7867 /* FALLTHRU */
7868 case TImode:
7869 /* In 32bit, we pass TImode in xmm registers. */
7870 case V16QImode:
7871 case V8HImode:
7872 case V4SImode:
7873 case V2DImode:
7874 case V4SFmode:
7875 case V2DFmode:
7876 if (!type || !AGGREGATE_TYPE_P (type))
7878 if (cum->sse_nregs)
7879 return gen_reg_or_parallel (mode, orig_mode,
7880 cum->sse_regno + FIRST_SSE_REG);
7882 break;
7884 case OImode:
7885 case XImode:
7886 /* OImode and XImode shouldn't be used directly. */
7887 gcc_unreachable ();
7889 case V64QImode:
7890 case V32HImode:
7891 case V16SImode:
7892 case V8DImode:
7893 case V16SFmode:
7894 case V8DFmode:
7895 case V8SFmode:
7896 case V8SImode:
7897 case V32QImode:
7898 case V16HImode:
7899 case V4DFmode:
7900 case V4DImode:
7901 if (!type || !AGGREGATE_TYPE_P (type))
7903 if (cum->sse_nregs)
7904 return gen_reg_or_parallel (mode, orig_mode,
7905 cum->sse_regno + FIRST_SSE_REG);
7907 break;
7909 case V8QImode:
7910 case V4HImode:
7911 case V2SImode:
7912 case V2SFmode:
7913 case V1TImode:
7914 case V1DImode:
7915 if (!type || !AGGREGATE_TYPE_P (type))
7917 if (cum->mmx_nregs)
7918 return gen_reg_or_parallel (mode, orig_mode,
7919 cum->mmx_regno + FIRST_MMX_REG);
7921 break;
7923 if (error_p)
7925 cum->float_in_sse = 0;
7926 error ("calling %qD with SSE calling convention without "
7927 "SSE/SSE2 enabled", cum->decl);
7928 sorry ("this is a GCC bug that can be worked around by adding "
7929 "attribute used to function called");
7932 return NULL_RTX;
7935 static rtx
7936 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7937 machine_mode orig_mode, const_tree type, bool named)
7939 /* Handle a hidden AL argument containing number of registers
7940 for varargs x86-64 functions. */
7941 if (mode == VOIDmode)
7942 return GEN_INT (cum->maybe_vaarg
7943 ? (cum->sse_nregs < 0
7944 ? X86_64_SSE_REGPARM_MAX
7945 : cum->sse_regno)
7946 : -1);
7948 switch (mode)
7950 default:
7951 break;
7953 case V8SFmode:
7954 case V8SImode:
7955 case V32QImode:
7956 case V16HImode:
7957 case V4DFmode:
7958 case V4DImode:
7959 case V16SFmode:
7960 case V16SImode:
7961 case V64QImode:
7962 case V32HImode:
7963 case V8DFmode:
7964 case V8DImode:
7965 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7966 if (!named)
7967 return NULL;
7968 break;
7971 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7972 cum->sse_nregs,
7973 &x86_64_int_parameter_registers [cum->regno],
7974 cum->sse_regno);
7977 static rtx
7978 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7979 machine_mode orig_mode, bool named,
7980 HOST_WIDE_INT bytes)
7982 unsigned int regno;
7984 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7985 We use value of -2 to specify that current function call is MSABI. */
7986 if (mode == VOIDmode)
7987 return GEN_INT (-2);
7989 /* If we've run out of registers, it goes on the stack. */
7990 if (cum->nregs == 0)
7991 return NULL_RTX;
7993 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7995 /* Only floating point modes are passed in anything but integer regs. */
7996 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7998 if (named)
7999 regno = cum->regno + FIRST_SSE_REG;
8000 else
8002 rtx t1, t2;
8004 /* Unnamed floating parameters are passed in both the
8005 SSE and integer registers. */
8006 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
8007 t2 = gen_rtx_REG (mode, regno);
8008 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
8009 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
8010 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
8013 /* Handle aggregated types passed in register. */
8014 if (orig_mode == BLKmode)
8016 if (bytes > 0 && bytes <= 8)
8017 mode = (bytes > 4 ? DImode : SImode);
8018 if (mode == BLKmode)
8019 mode = DImode;
8022 return gen_reg_or_parallel (mode, orig_mode, regno);
8025 /* Return where to put the arguments to a function.
8026 Return zero to push the argument on the stack, or a hard register in which to store the argument.
8028 MODE is the argument's machine mode. TYPE is the data type of the
8029 argument. It is null for libcalls where that information may not be
8030 available. CUM gives information about the preceding args and about
8031 the function being called. NAMED is nonzero if this argument is a
8032 named parameter (otherwise it is an extra parameter matching an
8033 ellipsis). */
8035 static rtx
8036 ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
8037 const_tree type, bool named)
8039 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8040 machine_mode mode = omode;
8041 HOST_WIDE_INT bytes, words;
8042 rtx arg;
8044 /* All pointer bounds argumntas are handled separately here. */
8045 if ((type && POINTER_BOUNDS_TYPE_P (type))
8046 || POINTER_BOUNDS_MODE_P (mode))
8048 /* Return NULL if bounds are forced to go in Bounds Table. */
8049 if (cum->bnds_in_bt)
8050 arg = NULL;
8051 /* Return the next available bound reg if any. */
8052 else if (cum->bnd_regno <= LAST_BND_REG)
8053 arg = gen_rtx_REG (BNDmode, cum->bnd_regno);
8054 /* Return the next special slot number otherwise. */
8055 else
8056 arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1);
8058 return arg;
8061 if (mode == BLKmode)
8062 bytes = int_size_in_bytes (type);
8063 else
8064 bytes = GET_MODE_SIZE (mode);
8065 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8067 /* To simplify the code below, represent vector types with a vector mode
8068 even if MMX/SSE are not active. */
8069 if (type && TREE_CODE (type) == VECTOR_TYPE)
8070 mode = type_natural_mode (type, cum, false);
8072 if (TARGET_64BIT)
8074 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
8076 if (call_abi == MS_ABI)
8077 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
8078 else
8079 arg = function_arg_64 (cum, mode, omode, type, named);
8081 else
8082 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
8084 return arg;
8087 /* A C expression that indicates when an argument must be passed by
8088 reference. If nonzero for an argument, a copy of that argument is
8089 made in memory and a pointer to the argument is passed instead of
8090 the argument itself. The pointer is passed in whatever way is
8091 appropriate for passing a pointer to that type. */
8093 static bool
8094 ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
8095 const_tree type, bool)
8097 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8099 /* Bounds are never passed by reference. */
8100 if ((type && POINTER_BOUNDS_TYPE_P (type))
8101 || POINTER_BOUNDS_MODE_P (mode))
8102 return false;
8104 if (TARGET_64BIT)
8106 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
8108 /* See Windows x64 Software Convention. */
8109 if (call_abi == MS_ABI)
8111 HOST_WIDE_INT msize = GET_MODE_SIZE (mode);
8113 if (type)
8115 /* Arrays are passed by reference. */
8116 if (TREE_CODE (type) == ARRAY_TYPE)
8117 return true;
8119 if (RECORD_OR_UNION_TYPE_P (type))
8121 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
8122 are passed by reference. */
8123 msize = int_size_in_bytes (type);
8127 /* __m128 is passed by reference. */
8128 return msize != 1 && msize != 2 && msize != 4 && msize != 8;
8130 else if (type && int_size_in_bytes (type) == -1)
8131 return true;
8134 return false;
8137 /* Return true when TYPE should be 128bit aligned for 32bit argument
8138 passing ABI. XXX: This function is obsolete and is only used for
8139 checking psABI compatibility with previous versions of GCC. */
8141 static bool
8142 ix86_compat_aligned_value_p (const_tree type)
8144 machine_mode mode = TYPE_MODE (type);
8145 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
8146 || mode == TDmode
8147 || mode == TFmode
8148 || mode == TCmode)
8149 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
8150 return true;
8151 if (TYPE_ALIGN (type) < 128)
8152 return false;
8154 if (AGGREGATE_TYPE_P (type))
8156 /* Walk the aggregates recursively. */
8157 switch (TREE_CODE (type))
8159 case RECORD_TYPE:
8160 case UNION_TYPE:
8161 case QUAL_UNION_TYPE:
8163 tree field;
8165 /* Walk all the structure fields. */
8166 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8168 if (TREE_CODE (field) == FIELD_DECL
8169 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
8170 return true;
8172 break;
8175 case ARRAY_TYPE:
8176 /* Just for use if some languages passes arrays by value. */
8177 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
8178 return true;
8179 break;
8181 default:
8182 gcc_unreachable ();
8185 return false;
8188 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
8189 XXX: This function is obsolete and is only used for checking psABI
8190 compatibility with previous versions of GCC. */
8192 static unsigned int
8193 ix86_compat_function_arg_boundary (machine_mode mode,
8194 const_tree type, unsigned int align)
8196 /* In 32bit, only _Decimal128 and __float128 are aligned to their
8197 natural boundaries. */
8198 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
8200 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
8201 make an exception for SSE modes since these require 128bit
8202 alignment.
8204 The handling here differs from field_alignment. ICC aligns MMX
8205 arguments to 4 byte boundaries, while structure fields are aligned
8206 to 8 byte boundaries. */
8207 if (!type)
8209 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
8210 align = PARM_BOUNDARY;
8212 else
8214 if (!ix86_compat_aligned_value_p (type))
8215 align = PARM_BOUNDARY;
8218 if (align > BIGGEST_ALIGNMENT)
8219 align = BIGGEST_ALIGNMENT;
8220 return align;
8223 /* Return true when TYPE should be 128bit aligned for 32bit argument
8224 passing ABI. */
8226 static bool
8227 ix86_contains_aligned_value_p (const_tree type)
8229 machine_mode mode = TYPE_MODE (type);
8231 if (mode == XFmode || mode == XCmode)
8232 return false;
8234 if (TYPE_ALIGN (type) < 128)
8235 return false;
8237 if (AGGREGATE_TYPE_P (type))
8239 /* Walk the aggregates recursively. */
8240 switch (TREE_CODE (type))
8242 case RECORD_TYPE:
8243 case UNION_TYPE:
8244 case QUAL_UNION_TYPE:
8246 tree field;
8248 /* Walk all the structure fields. */
8249 for (field = TYPE_FIELDS (type);
8250 field;
8251 field = DECL_CHAIN (field))
8253 if (TREE_CODE (field) == FIELD_DECL
8254 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
8255 return true;
8257 break;
8260 case ARRAY_TYPE:
8261 /* Just for use if some languages passes arrays by value. */
8262 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
8263 return true;
8264 break;
8266 default:
8267 gcc_unreachable ();
8270 else
8271 return TYPE_ALIGN (type) >= 128;
8273 return false;
8276 /* Gives the alignment boundary, in bits, of an argument with the
8277 specified mode and type. */
8279 static unsigned int
8280 ix86_function_arg_boundary (machine_mode mode, const_tree type)
8282 unsigned int align;
8283 if (type)
8285 /* Since the main variant type is used for call, we convert it to
8286 the main variant type. */
8287 type = TYPE_MAIN_VARIANT (type);
8288 align = TYPE_ALIGN (type);
8290 else
8291 align = GET_MODE_ALIGNMENT (mode);
8292 if (align < PARM_BOUNDARY)
8293 align = PARM_BOUNDARY;
8294 else
8296 static bool warned;
8297 unsigned int saved_align = align;
8299 if (!TARGET_64BIT)
8301 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
8302 if (!type)
8304 if (mode == XFmode || mode == XCmode)
8305 align = PARM_BOUNDARY;
8307 else if (!ix86_contains_aligned_value_p (type))
8308 align = PARM_BOUNDARY;
8310 if (align < 128)
8311 align = PARM_BOUNDARY;
8314 if (warn_psabi
8315 && !warned
8316 && align != ix86_compat_function_arg_boundary (mode, type,
8317 saved_align))
8319 warned = true;
8320 inform (input_location,
8321 "The ABI for passing parameters with %d-byte"
8322 " alignment has changed in GCC 4.6",
8323 align / BITS_PER_UNIT);
8327 return align;
8330 /* Return true if N is a possible register number of function value. */
8332 static bool
8333 ix86_function_value_regno_p (const unsigned int regno)
8335 switch (regno)
8337 case AX_REG:
8338 return true;
8339 case DX_REG:
8340 return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
8341 case DI_REG:
8342 case SI_REG:
8343 return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
8345 case BND0_REG:
8346 case BND1_REG:
8347 return chkp_function_instrumented_p (current_function_decl);
8349 /* Complex values are returned in %st(0)/%st(1) pair. */
8350 case ST0_REG:
8351 case ST1_REG:
8352 /* TODO: The function should depend on current function ABI but
8353 builtins.c would need updating then. Therefore we use the
8354 default ABI. */
8355 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
8356 return false;
8357 return TARGET_FLOAT_RETURNS_IN_80387;
8359 /* Complex values are returned in %xmm0/%xmm1 pair. */
8360 case XMM0_REG:
8361 case XMM1_REG:
8362 return TARGET_SSE;
8364 case MM0_REG:
8365 if (TARGET_MACHO || TARGET_64BIT)
8366 return false;
8367 return TARGET_MMX;
8370 return false;
8373 /* Define how to find the value returned by a function.
8374 VALTYPE is the data type of the value (as a tree).
8375 If the precise function being called is known, FUNC is its FUNCTION_DECL;
8376 otherwise, FUNC is 0. */
8378 static rtx
8379 function_value_32 (machine_mode orig_mode, machine_mode mode,
8380 const_tree fntype, const_tree fn)
8382 unsigned int regno;
8384 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
8385 we normally prevent this case when mmx is not available. However
8386 some ABIs may require the result to be returned like DImode. */
8387 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
8388 regno = FIRST_MMX_REG;
8390 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
8391 we prevent this case when sse is not available. However some ABIs
8392 may require the result to be returned like integer TImode. */
8393 else if (mode == TImode
8394 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
8395 regno = FIRST_SSE_REG;
8397 /* 32-byte vector modes in %ymm0. */
8398 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
8399 regno = FIRST_SSE_REG;
8401 /* 64-byte vector modes in %zmm0. */
8402 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
8403 regno = FIRST_SSE_REG;
8405 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
8406 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
8407 regno = FIRST_FLOAT_REG;
8408 else
8409 /* Most things go in %eax. */
8410 regno = AX_REG;
8412 /* Override FP return register with %xmm0 for local functions when
8413 SSE math is enabled or for functions with sseregparm attribute. */
8414 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
8416 int sse_level = ix86_function_sseregparm (fntype, fn, false);
8417 if (sse_level == -1)
8419 error ("calling %qD with SSE caling convention without "
8420 "SSE/SSE2 enabled", fn);
8421 sorry ("this is a GCC bug that can be worked around by adding "
8422 "attribute used to function called");
8424 else if ((sse_level >= 1 && mode == SFmode)
8425 || (sse_level == 2 && mode == DFmode))
8426 regno = FIRST_SSE_REG;
8429 /* OImode shouldn't be used directly. */
8430 gcc_assert (mode != OImode);
8432 return gen_rtx_REG (orig_mode, regno);
8435 static rtx
8436 function_value_64 (machine_mode orig_mode, machine_mode mode,
8437 const_tree valtype)
8439 rtx ret;
8441 /* Handle libcalls, which don't provide a type node. */
8442 if (valtype == NULL)
8444 unsigned int regno;
8446 switch (mode)
8448 case SFmode:
8449 case SCmode:
8450 case DFmode:
8451 case DCmode:
8452 case TFmode:
8453 case SDmode:
8454 case DDmode:
8455 case TDmode:
8456 regno = FIRST_SSE_REG;
8457 break;
8458 case XFmode:
8459 case XCmode:
8460 regno = FIRST_FLOAT_REG;
8461 break;
8462 case TCmode:
8463 return NULL;
8464 default:
8465 regno = AX_REG;
8468 return gen_rtx_REG (mode, regno);
8470 else if (POINTER_TYPE_P (valtype))
8472 /* Pointers are always returned in word_mode. */
8473 mode = word_mode;
8476 ret = construct_container (mode, orig_mode, valtype, 1,
8477 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
8478 x86_64_int_return_registers, 0);
8480 /* For zero sized structures, construct_container returns NULL, but we
8481 need to keep rest of compiler happy by returning meaningful value. */
8482 if (!ret)
8483 ret = gen_rtx_REG (orig_mode, AX_REG);
8485 return ret;
8488 static rtx
8489 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
8490 const_tree valtype)
8492 unsigned int regno = AX_REG;
8494 if (TARGET_SSE)
8496 switch (GET_MODE_SIZE (mode))
8498 case 16:
8499 if (valtype != NULL_TREE
8500 && !VECTOR_INTEGER_TYPE_P (valtype)
8501 && !VECTOR_INTEGER_TYPE_P (valtype)
8502 && !INTEGRAL_TYPE_P (valtype)
8503 && !VECTOR_FLOAT_TYPE_P (valtype))
8504 break;
8505 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8506 && !COMPLEX_MODE_P (mode))
8507 regno = FIRST_SSE_REG;
8508 break;
8509 case 8:
8510 case 4:
8511 if (mode == SFmode || mode == DFmode)
8512 regno = FIRST_SSE_REG;
8513 break;
8514 default:
8515 break;
8518 return gen_rtx_REG (orig_mode, regno);
8521 static rtx
8522 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
8523 machine_mode orig_mode, machine_mode mode)
8525 const_tree fn, fntype;
8527 fn = NULL_TREE;
8528 if (fntype_or_decl && DECL_P (fntype_or_decl))
8529 fn = fntype_or_decl;
8530 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
8532 if ((valtype && POINTER_BOUNDS_TYPE_P (valtype))
8533 || POINTER_BOUNDS_MODE_P (mode))
8534 return gen_rtx_REG (BNDmode, FIRST_BND_REG);
8535 else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
8536 return function_value_ms_64 (orig_mode, mode, valtype);
8537 else if (TARGET_64BIT)
8538 return function_value_64 (orig_mode, mode, valtype);
8539 else
8540 return function_value_32 (orig_mode, mode, fntype, fn);
8543 static rtx
8544 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
8546 machine_mode mode, orig_mode;
8548 orig_mode = TYPE_MODE (valtype);
8549 mode = type_natural_mode (valtype, NULL, true);
8550 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
8553 /* Return an RTX representing a place where a function returns
8554 or recieves pointer bounds or NULL if no bounds are returned.
8556 VALTYPE is a data type of a value returned by the function.
8558 FN_DECL_OR_TYPE is a tree node representing FUNCTION_DECL
8559 or FUNCTION_TYPE of the function.
8561 If OUTGOING is false, return a place in which the caller will
8562 see the return value. Otherwise, return a place where a
8563 function returns a value. */
8565 static rtx
8566 ix86_function_value_bounds (const_tree valtype,
8567 const_tree fntype_or_decl ATTRIBUTE_UNUSED,
8568 bool outgoing ATTRIBUTE_UNUSED)
8570 rtx res = NULL_RTX;
8572 if (BOUNDED_TYPE_P (valtype))
8573 res = gen_rtx_REG (BNDmode, FIRST_BND_REG);
8574 else if (chkp_type_has_pointer (valtype))
8576 bitmap slots;
8577 rtx bounds[2];
8578 bitmap_iterator bi;
8579 unsigned i, bnd_no = 0;
8581 bitmap_obstack_initialize (NULL);
8582 slots = BITMAP_ALLOC (NULL);
8583 chkp_find_bound_slots (valtype, slots);
8585 EXECUTE_IF_SET_IN_BITMAP (slots, 0, i, bi)
8587 rtx reg = gen_rtx_REG (BNDmode, FIRST_BND_REG + bnd_no);
8588 rtx offs = GEN_INT (i * POINTER_SIZE / BITS_PER_UNIT);
8589 gcc_assert (bnd_no < 2);
8590 bounds[bnd_no++] = gen_rtx_EXPR_LIST (VOIDmode, reg, offs);
8593 res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (bnd_no, bounds));
8595 BITMAP_FREE (slots);
8596 bitmap_obstack_release (NULL);
8598 else
8599 res = NULL_RTX;
8601 return res;
8604 /* Pointer function arguments and return values are promoted to
8605 word_mode. */
8607 static machine_mode
8608 ix86_promote_function_mode (const_tree type, machine_mode mode,
8609 int *punsignedp, const_tree fntype,
8610 int for_return)
8612 if (type != NULL_TREE && POINTER_TYPE_P (type))
8614 *punsignedp = POINTERS_EXTEND_UNSIGNED;
8615 return word_mode;
8617 return default_promote_function_mode (type, mode, punsignedp, fntype,
8618 for_return);
8621 /* Return true if a structure, union or array with MODE containing FIELD
8622 should be accessed using BLKmode. */
8624 static bool
8625 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
8627 /* Union with XFmode must be in BLKmode. */
8628 return (mode == XFmode
8629 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8630 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8634 ix86_libcall_value (machine_mode mode)
8636 return ix86_function_value_1 (NULL, NULL, mode, mode);
8639 /* Return true iff type is returned in memory. */
8641 static bool
8642 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8644 #ifdef SUBTARGET_RETURN_IN_MEMORY
8645 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8646 #else
8647 const machine_mode mode = type_natural_mode (type, NULL, true);
8648 HOST_WIDE_INT size;
8650 if (POINTER_BOUNDS_TYPE_P (type))
8651 return false;
8653 if (TARGET_64BIT)
8655 if (ix86_function_type_abi (fntype) == MS_ABI)
8657 size = int_size_in_bytes (type);
8659 /* __m128 is returned in xmm0. */
8660 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8661 || INTEGRAL_TYPE_P (type)
8662 || VECTOR_FLOAT_TYPE_P (type))
8663 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8664 && !COMPLEX_MODE_P (mode)
8665 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8666 return false;
8668 /* Otherwise, the size must be exactly in [1248]. */
8669 return size != 1 && size != 2 && size != 4 && size != 8;
8671 else
8673 int needed_intregs, needed_sseregs;
8675 return examine_argument (mode, type, 1,
8676 &needed_intregs, &needed_sseregs);
8679 else
8681 size = int_size_in_bytes (type);
8683 /* Intel MCU psABI returns scalars and aggregates no larger than 8
8684 bytes in registers. */
8685 if (TARGET_IAMCU)
8686 return VECTOR_MODE_P (mode) || size < 0 || size > 8;
8688 if (mode == BLKmode)
8689 return true;
8691 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8692 return false;
8694 if (VECTOR_MODE_P (mode) || mode == TImode)
8696 /* User-created vectors small enough to fit in EAX. */
8697 if (size < 8)
8698 return false;
8700 /* Unless ABI prescibes otherwise,
8701 MMX/3dNow values are returned in MM0 if available. */
8703 if (size == 8)
8704 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8706 /* SSE values are returned in XMM0 if available. */
8707 if (size == 16)
8708 return !TARGET_SSE;
8710 /* AVX values are returned in YMM0 if available. */
8711 if (size == 32)
8712 return !TARGET_AVX;
8714 /* AVX512F values are returned in ZMM0 if available. */
8715 if (size == 64)
8716 return !TARGET_AVX512F;
8719 if (mode == XFmode)
8720 return false;
8722 if (size > 12)
8723 return true;
8725 /* OImode shouldn't be used directly. */
8726 gcc_assert (mode != OImode);
8728 return false;
8730 #endif
8734 /* Create the va_list data type. */
8736 static tree
8737 ix86_build_builtin_va_list_64 (void)
8739 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8741 record = lang_hooks.types.make_type (RECORD_TYPE);
8742 type_decl = build_decl (BUILTINS_LOCATION,
8743 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8745 f_gpr = build_decl (BUILTINS_LOCATION,
8746 FIELD_DECL, get_identifier ("gp_offset"),
8747 unsigned_type_node);
8748 f_fpr = build_decl (BUILTINS_LOCATION,
8749 FIELD_DECL, get_identifier ("fp_offset"),
8750 unsigned_type_node);
8751 f_ovf = build_decl (BUILTINS_LOCATION,
8752 FIELD_DECL, get_identifier ("overflow_arg_area"),
8753 ptr_type_node);
8754 f_sav = build_decl (BUILTINS_LOCATION,
8755 FIELD_DECL, get_identifier ("reg_save_area"),
8756 ptr_type_node);
8758 va_list_gpr_counter_field = f_gpr;
8759 va_list_fpr_counter_field = f_fpr;
8761 DECL_FIELD_CONTEXT (f_gpr) = record;
8762 DECL_FIELD_CONTEXT (f_fpr) = record;
8763 DECL_FIELD_CONTEXT (f_ovf) = record;
8764 DECL_FIELD_CONTEXT (f_sav) = record;
8766 TYPE_STUB_DECL (record) = type_decl;
8767 TYPE_NAME (record) = type_decl;
8768 TYPE_FIELDS (record) = f_gpr;
8769 DECL_CHAIN (f_gpr) = f_fpr;
8770 DECL_CHAIN (f_fpr) = f_ovf;
8771 DECL_CHAIN (f_ovf) = f_sav;
8773 layout_type (record);
8775 /* The correct type is an array type of one element. */
8776 return build_array_type (record, build_index_type (size_zero_node));
8779 /* Setup the builtin va_list data type and for 64-bit the additional
8780 calling convention specific va_list data types. */
8782 static tree
8783 ix86_build_builtin_va_list (void)
8785 if (TARGET_64BIT)
8787 /* Initialize ABI specific va_list builtin types. */
8788 tree sysv_va_list, ms_va_list;
8790 sysv_va_list = ix86_build_builtin_va_list_64 ();
8791 sysv_va_list_type_node = build_variant_type_copy (sysv_va_list);
8793 /* For MS_ABI we use plain pointer to argument area. */
8794 ms_va_list = build_pointer_type (char_type_node);
8795 ms_va_list_type_node = build_variant_type_copy (ms_va_list);
8797 return (ix86_abi == MS_ABI) ? ms_va_list : sysv_va_list;
8799 else
8801 /* For i386 we use plain pointer to argument area. */
8802 return build_pointer_type (char_type_node);
8806 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8808 static void
8809 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8811 rtx save_area, mem;
8812 alias_set_type set;
8813 int i, max;
8815 /* GPR size of varargs save area. */
8816 if (cfun->va_list_gpr_size)
8817 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8818 else
8819 ix86_varargs_gpr_size = 0;
8821 /* FPR size of varargs save area. We don't need it if we don't pass
8822 anything in SSE registers. */
8823 if (TARGET_SSE && cfun->va_list_fpr_size)
8824 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8825 else
8826 ix86_varargs_fpr_size = 0;
8828 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8829 return;
8831 save_area = frame_pointer_rtx;
8832 set = get_varargs_alias_set ();
8834 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8835 if (max > X86_64_REGPARM_MAX)
8836 max = X86_64_REGPARM_MAX;
8838 for (i = cum->regno; i < max; i++)
8840 mem = gen_rtx_MEM (word_mode,
8841 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8842 MEM_NOTRAP_P (mem) = 1;
8843 set_mem_alias_set (mem, set);
8844 emit_move_insn (mem,
8845 gen_rtx_REG (word_mode,
8846 x86_64_int_parameter_registers[i]));
8849 if (ix86_varargs_fpr_size)
8851 machine_mode smode;
8852 rtx_code_label *label;
8853 rtx test;
8855 /* Now emit code to save SSE registers. The AX parameter contains number
8856 of SSE parameter registers used to call this function, though all we
8857 actually check here is the zero/non-zero status. */
8859 label = gen_label_rtx ();
8860 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8861 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8862 label));
8864 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8865 we used movdqa (i.e. TImode) instead? Perhaps even better would
8866 be if we could determine the real mode of the data, via a hook
8867 into pass_stdarg. Ignore all that for now. */
8868 smode = V4SFmode;
8869 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8870 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8872 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8873 if (max > X86_64_SSE_REGPARM_MAX)
8874 max = X86_64_SSE_REGPARM_MAX;
8876 for (i = cum->sse_regno; i < max; ++i)
8878 mem = plus_constant (Pmode, save_area,
8879 i * 16 + ix86_varargs_gpr_size);
8880 mem = gen_rtx_MEM (smode, mem);
8881 MEM_NOTRAP_P (mem) = 1;
8882 set_mem_alias_set (mem, set);
8883 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8885 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8888 emit_label (label);
8892 static void
8893 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8895 alias_set_type set = get_varargs_alias_set ();
8896 int i;
8898 /* Reset to zero, as there might be a sysv vaarg used
8899 before. */
8900 ix86_varargs_gpr_size = 0;
8901 ix86_varargs_fpr_size = 0;
8903 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8905 rtx reg, mem;
8907 mem = gen_rtx_MEM (Pmode,
8908 plus_constant (Pmode, virtual_incoming_args_rtx,
8909 i * UNITS_PER_WORD));
8910 MEM_NOTRAP_P (mem) = 1;
8911 set_mem_alias_set (mem, set);
8913 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8914 emit_move_insn (mem, reg);
8918 static void
8919 ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
8920 tree type, int *, int no_rtl)
8922 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8923 CUMULATIVE_ARGS next_cum;
8924 tree fntype;
8926 /* This argument doesn't appear to be used anymore. Which is good,
8927 because the old code here didn't suppress rtl generation. */
8928 gcc_assert (!no_rtl);
8930 if (!TARGET_64BIT)
8931 return;
8933 fntype = TREE_TYPE (current_function_decl);
8935 /* For varargs, we do not want to skip the dummy va_dcl argument.
8936 For stdargs, we do want to skip the last named argument. */
8937 next_cum = *cum;
8938 if (stdarg_p (fntype))
8939 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8940 true);
8942 if (cum->call_abi == MS_ABI)
8943 setup_incoming_varargs_ms_64 (&next_cum);
8944 else
8945 setup_incoming_varargs_64 (&next_cum);
8948 static void
8949 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
8950 enum machine_mode mode,
8951 tree type,
8952 int *pretend_size ATTRIBUTE_UNUSED,
8953 int no_rtl)
8955 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8956 CUMULATIVE_ARGS next_cum;
8957 tree fntype;
8958 rtx save_area;
8959 int bnd_reg, i, max;
8961 gcc_assert (!no_rtl);
8963 /* Do nothing if we use plain pointer to argument area. */
8964 if (!TARGET_64BIT || cum->call_abi == MS_ABI)
8965 return;
8967 fntype = TREE_TYPE (current_function_decl);
8969 /* For varargs, we do not want to skip the dummy va_dcl argument.
8970 For stdargs, we do want to skip the last named argument. */
8971 next_cum = *cum;
8972 if (stdarg_p (fntype))
8973 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8974 true);
8975 save_area = frame_pointer_rtx;
8977 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8978 if (max > X86_64_REGPARM_MAX)
8979 max = X86_64_REGPARM_MAX;
8981 bnd_reg = cum->bnd_regno + cum->force_bnd_pass;
8982 if (chkp_function_instrumented_p (current_function_decl))
8983 for (i = cum->regno; i < max; i++)
8985 rtx addr = plus_constant (Pmode, save_area, i * UNITS_PER_WORD);
8986 rtx ptr = gen_rtx_REG (Pmode,
8987 x86_64_int_parameter_registers[i]);
8988 rtx bounds;
8990 if (bnd_reg <= LAST_BND_REG)
8991 bounds = gen_rtx_REG (BNDmode, bnd_reg);
8992 else
8994 rtx ldx_addr =
8995 plus_constant (Pmode, arg_pointer_rtx,
8996 (LAST_BND_REG - bnd_reg) * GET_MODE_SIZE (Pmode));
8997 bounds = gen_reg_rtx (BNDmode);
8998 emit_insn (BNDmode == BND64mode
8999 ? gen_bnd64_ldx (bounds, ldx_addr, ptr)
9000 : gen_bnd32_ldx (bounds, ldx_addr, ptr));
9003 emit_insn (BNDmode == BND64mode
9004 ? gen_bnd64_stx (addr, ptr, bounds)
9005 : gen_bnd32_stx (addr, ptr, bounds));
9007 bnd_reg++;
9012 /* Checks if TYPE is of kind va_list char *. */
9014 static bool
9015 is_va_list_char_pointer (tree type)
9017 tree canonic;
9019 /* For 32-bit it is always true. */
9020 if (!TARGET_64BIT)
9021 return true;
9022 canonic = ix86_canonical_va_list_type (type);
9023 return (canonic == ms_va_list_type_node
9024 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
9027 /* Implement va_start. */
9029 static void
9030 ix86_va_start (tree valist, rtx nextarg)
9032 HOST_WIDE_INT words, n_gpr, n_fpr;
9033 tree f_gpr, f_fpr, f_ovf, f_sav;
9034 tree gpr, fpr, ovf, sav, t;
9035 tree type;
9036 rtx ovf_rtx;
9038 if (flag_split_stack
9039 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
9041 unsigned int scratch_regno;
9043 /* When we are splitting the stack, we can't refer to the stack
9044 arguments using internal_arg_pointer, because they may be on
9045 the old stack. The split stack prologue will arrange to
9046 leave a pointer to the old stack arguments in a scratch
9047 register, which we here copy to a pseudo-register. The split
9048 stack prologue can't set the pseudo-register directly because
9049 it (the prologue) runs before any registers have been saved. */
9051 scratch_regno = split_stack_prologue_scratch_regno ();
9052 if (scratch_regno != INVALID_REGNUM)
9054 rtx reg;
9055 rtx_insn *seq;
9057 reg = gen_reg_rtx (Pmode);
9058 cfun->machine->split_stack_varargs_pointer = reg;
9060 start_sequence ();
9061 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
9062 seq = get_insns ();
9063 end_sequence ();
9065 push_topmost_sequence ();
9066 emit_insn_after (seq, entry_of_function ());
9067 pop_topmost_sequence ();
9071 /* Only 64bit target needs something special. */
9072 if (is_va_list_char_pointer (TREE_TYPE (valist)))
9074 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
9075 std_expand_builtin_va_start (valist, nextarg);
9076 else
9078 rtx va_r, next;
9080 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
9081 next = expand_binop (ptr_mode, add_optab,
9082 cfun->machine->split_stack_varargs_pointer,
9083 crtl->args.arg_offset_rtx,
9084 NULL_RTX, 0, OPTAB_LIB_WIDEN);
9085 convert_move (va_r, next, 0);
9087 /* Store zero bounds for va_list. */
9088 if (chkp_function_instrumented_p (current_function_decl))
9089 chkp_expand_bounds_reset_for_mem (valist,
9090 make_tree (TREE_TYPE (valist),
9091 next));
9094 return;
9097 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
9098 f_fpr = DECL_CHAIN (f_gpr);
9099 f_ovf = DECL_CHAIN (f_fpr);
9100 f_sav = DECL_CHAIN (f_ovf);
9102 valist = build_simple_mem_ref (valist);
9103 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
9104 /* The following should be folded into the MEM_REF offset. */
9105 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
9106 f_gpr, NULL_TREE);
9107 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
9108 f_fpr, NULL_TREE);
9109 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
9110 f_ovf, NULL_TREE);
9111 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
9112 f_sav, NULL_TREE);
9114 /* Count number of gp and fp argument registers used. */
9115 words = crtl->args.info.words;
9116 n_gpr = crtl->args.info.regno;
9117 n_fpr = crtl->args.info.sse_regno;
9119 if (cfun->va_list_gpr_size)
9121 type = TREE_TYPE (gpr);
9122 t = build2 (MODIFY_EXPR, type,
9123 gpr, build_int_cst (type, n_gpr * 8));
9124 TREE_SIDE_EFFECTS (t) = 1;
9125 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9128 if (TARGET_SSE && cfun->va_list_fpr_size)
9130 type = TREE_TYPE (fpr);
9131 t = build2 (MODIFY_EXPR, type, fpr,
9132 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
9133 TREE_SIDE_EFFECTS (t) = 1;
9134 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9137 /* Find the overflow area. */
9138 type = TREE_TYPE (ovf);
9139 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
9140 ovf_rtx = crtl->args.internal_arg_pointer;
9141 else
9142 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
9143 t = make_tree (type, ovf_rtx);
9144 if (words != 0)
9145 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
9147 /* Store zero bounds for overflow area pointer. */
9148 if (chkp_function_instrumented_p (current_function_decl))
9149 chkp_expand_bounds_reset_for_mem (ovf, t);
9151 t = build2 (MODIFY_EXPR, type, ovf, t);
9152 TREE_SIDE_EFFECTS (t) = 1;
9153 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9155 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
9157 /* Find the register save area.
9158 Prologue of the function save it right above stack frame. */
9159 type = TREE_TYPE (sav);
9160 t = make_tree (type, frame_pointer_rtx);
9161 if (!ix86_varargs_gpr_size)
9162 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
9164 /* Store zero bounds for save area pointer. */
9165 if (chkp_function_instrumented_p (current_function_decl))
9166 chkp_expand_bounds_reset_for_mem (sav, t);
9168 t = build2 (MODIFY_EXPR, type, sav, t);
9169 TREE_SIDE_EFFECTS (t) = 1;
9170 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9174 /* Implement va_arg. */
9176 static tree
9177 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
9178 gimple_seq *post_p)
9180 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
9181 tree f_gpr, f_fpr, f_ovf, f_sav;
9182 tree gpr, fpr, ovf, sav, t;
9183 int size, rsize;
9184 tree lab_false, lab_over = NULL_TREE;
9185 tree addr, t2;
9186 rtx container;
9187 int indirect_p = 0;
9188 tree ptrtype;
9189 machine_mode nat_mode;
9190 unsigned int arg_boundary;
9192 /* Only 64bit target needs something special. */
9193 if (is_va_list_char_pointer (TREE_TYPE (valist)))
9194 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
9196 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
9197 f_fpr = DECL_CHAIN (f_gpr);
9198 f_ovf = DECL_CHAIN (f_fpr);
9199 f_sav = DECL_CHAIN (f_ovf);
9201 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
9202 valist, f_gpr, NULL_TREE);
9204 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
9205 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
9206 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
9208 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
9209 if (indirect_p)
9210 type = build_pointer_type (type);
9211 size = int_size_in_bytes (type);
9212 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
9214 nat_mode = type_natural_mode (type, NULL, false);
9215 switch (nat_mode)
9217 case V8SFmode:
9218 case V8SImode:
9219 case V32QImode:
9220 case V16HImode:
9221 case V4DFmode:
9222 case V4DImode:
9223 case V16SFmode:
9224 case V16SImode:
9225 case V64QImode:
9226 case V32HImode:
9227 case V8DFmode:
9228 case V8DImode:
9229 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
9230 if (!TARGET_64BIT_MS_ABI)
9232 container = NULL;
9233 break;
9236 default:
9237 container = construct_container (nat_mode, TYPE_MODE (type),
9238 type, 0, X86_64_REGPARM_MAX,
9239 X86_64_SSE_REGPARM_MAX, intreg,
9241 break;
9244 /* Pull the value out of the saved registers. */
9246 addr = create_tmp_var (ptr_type_node, "addr");
9248 if (container)
9250 int needed_intregs, needed_sseregs;
9251 bool need_temp;
9252 tree int_addr, sse_addr;
9254 lab_false = create_artificial_label (UNKNOWN_LOCATION);
9255 lab_over = create_artificial_label (UNKNOWN_LOCATION);
9257 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
9259 need_temp = (!REG_P (container)
9260 && ((needed_intregs && TYPE_ALIGN (type) > 64)
9261 || TYPE_ALIGN (type) > 128));
9263 /* In case we are passing structure, verify that it is consecutive block
9264 on the register save area. If not we need to do moves. */
9265 if (!need_temp && !REG_P (container))
9267 /* Verify that all registers are strictly consecutive */
9268 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
9270 int i;
9272 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9274 rtx slot = XVECEXP (container, 0, i);
9275 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
9276 || INTVAL (XEXP (slot, 1)) != i * 16)
9277 need_temp = true;
9280 else
9282 int i;
9284 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9286 rtx slot = XVECEXP (container, 0, i);
9287 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
9288 || INTVAL (XEXP (slot, 1)) != i * 8)
9289 need_temp = true;
9293 if (!need_temp)
9295 int_addr = addr;
9296 sse_addr = addr;
9298 else
9300 int_addr = create_tmp_var (ptr_type_node, "int_addr");
9301 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
9304 /* First ensure that we fit completely in registers. */
9305 if (needed_intregs)
9307 t = build_int_cst (TREE_TYPE (gpr),
9308 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
9309 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
9310 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9311 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9312 gimplify_and_add (t, pre_p);
9314 if (needed_sseregs)
9316 t = build_int_cst (TREE_TYPE (fpr),
9317 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
9318 + X86_64_REGPARM_MAX * 8);
9319 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
9320 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9321 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9322 gimplify_and_add (t, pre_p);
9325 /* Compute index to start of area used for integer regs. */
9326 if (needed_intregs)
9328 /* int_addr = gpr + sav; */
9329 t = fold_build_pointer_plus (sav, gpr);
9330 gimplify_assign (int_addr, t, pre_p);
9332 if (needed_sseregs)
9334 /* sse_addr = fpr + sav; */
9335 t = fold_build_pointer_plus (sav, fpr);
9336 gimplify_assign (sse_addr, t, pre_p);
9338 if (need_temp)
9340 int i, prev_size = 0;
9341 tree temp = create_tmp_var (type, "va_arg_tmp");
9343 /* addr = &temp; */
9344 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
9345 gimplify_assign (addr, t, pre_p);
9347 for (i = 0; i < XVECLEN (container, 0); i++)
9349 rtx slot = XVECEXP (container, 0, i);
9350 rtx reg = XEXP (slot, 0);
9351 machine_mode mode = GET_MODE (reg);
9352 tree piece_type;
9353 tree addr_type;
9354 tree daddr_type;
9355 tree src_addr, src;
9356 int src_offset;
9357 tree dest_addr, dest;
9358 int cur_size = GET_MODE_SIZE (mode);
9360 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
9361 prev_size = INTVAL (XEXP (slot, 1));
9362 if (prev_size + cur_size > size)
9364 cur_size = size - prev_size;
9365 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
9366 if (mode == BLKmode)
9367 mode = QImode;
9369 piece_type = lang_hooks.types.type_for_mode (mode, 1);
9370 if (mode == GET_MODE (reg))
9371 addr_type = build_pointer_type (piece_type);
9372 else
9373 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9374 true);
9375 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9376 true);
9378 if (SSE_REGNO_P (REGNO (reg)))
9380 src_addr = sse_addr;
9381 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
9383 else
9385 src_addr = int_addr;
9386 src_offset = REGNO (reg) * 8;
9388 src_addr = fold_convert (addr_type, src_addr);
9389 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
9391 dest_addr = fold_convert (daddr_type, addr);
9392 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
9393 if (cur_size == GET_MODE_SIZE (mode))
9395 src = build_va_arg_indirect_ref (src_addr);
9396 dest = build_va_arg_indirect_ref (dest_addr);
9398 gimplify_assign (dest, src, pre_p);
9400 else
9402 tree copy
9403 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
9404 3, dest_addr, src_addr,
9405 size_int (cur_size));
9406 gimplify_and_add (copy, pre_p);
9408 prev_size += cur_size;
9412 if (needed_intregs)
9414 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
9415 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
9416 gimplify_assign (gpr, t, pre_p);
9419 if (needed_sseregs)
9421 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
9422 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
9423 gimplify_assign (unshare_expr (fpr), t, pre_p);
9426 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
9428 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
9431 /* ... otherwise out of the overflow area. */
9433 /* When we align parameter on stack for caller, if the parameter
9434 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
9435 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
9436 here with caller. */
9437 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
9438 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
9439 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
9441 /* Care for on-stack alignment if needed. */
9442 if (arg_boundary <= 64 || size == 0)
9443 t = ovf;
9444 else
9446 HOST_WIDE_INT align = arg_boundary / 8;
9447 t = fold_build_pointer_plus_hwi (ovf, align - 1);
9448 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
9449 build_int_cst (TREE_TYPE (t), -align));
9452 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
9453 gimplify_assign (addr, t, pre_p);
9455 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
9456 gimplify_assign (unshare_expr (ovf), t, pre_p);
9458 if (container)
9459 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
9461 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
9462 addr = fold_convert (ptrtype, addr);
9464 if (indirect_p)
9465 addr = build_va_arg_indirect_ref (addr);
9466 return build_va_arg_indirect_ref (addr);
9469 /* Return true if OPNUM's MEM should be matched
9470 in movabs* patterns. */
9472 bool
9473 ix86_check_movabs (rtx insn, int opnum)
9475 rtx set, mem;
9477 set = PATTERN (insn);
9478 if (GET_CODE (set) == PARALLEL)
9479 set = XVECEXP (set, 0, 0);
9480 gcc_assert (GET_CODE (set) == SET);
9481 mem = XEXP (set, opnum);
9482 while (SUBREG_P (mem))
9483 mem = SUBREG_REG (mem);
9484 gcc_assert (MEM_P (mem));
9485 return volatile_ok || !MEM_VOLATILE_P (mem);
9488 /* Initialize the table of extra 80387 mathematical constants. */
9490 static void
9491 init_ext_80387_constants (void)
9493 static const char * cst[5] =
9495 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
9496 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
9497 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
9498 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
9499 "3.1415926535897932385128089594061862044", /* 4: fldpi */
9501 int i;
9503 for (i = 0; i < 5; i++)
9505 real_from_string (&ext_80387_constants_table[i], cst[i]);
9506 /* Ensure each constant is rounded to XFmode precision. */
9507 real_convert (&ext_80387_constants_table[i],
9508 XFmode, &ext_80387_constants_table[i]);
9511 ext_80387_constants_init = 1;
9514 /* Return non-zero if the constant is something that
9515 can be loaded with a special instruction. */
9518 standard_80387_constant_p (rtx x)
9520 machine_mode mode = GET_MODE (x);
9522 REAL_VALUE_TYPE r;
9524 if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
9525 return -1;
9527 if (x == CONST0_RTX (mode))
9528 return 1;
9529 if (x == CONST1_RTX (mode))
9530 return 2;
9532 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9534 /* For XFmode constants, try to find a special 80387 instruction when
9535 optimizing for size or on those CPUs that benefit from them. */
9536 if (mode == XFmode
9537 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
9539 int i;
9541 if (! ext_80387_constants_init)
9542 init_ext_80387_constants ();
9544 for (i = 0; i < 5; i++)
9545 if (real_identical (&r, &ext_80387_constants_table[i]))
9546 return i + 3;
9549 /* Load of the constant -0.0 or -1.0 will be split as
9550 fldz;fchs or fld1;fchs sequence. */
9551 if (real_isnegzero (&r))
9552 return 8;
9553 if (real_identical (&r, &dconstm1))
9554 return 9;
9556 return 0;
9559 /* Return the opcode of the special instruction to be used to load
9560 the constant X. */
9562 const char *
9563 standard_80387_constant_opcode (rtx x)
9565 switch (standard_80387_constant_p (x))
9567 case 1:
9568 return "fldz";
9569 case 2:
9570 return "fld1";
9571 case 3:
9572 return "fldlg2";
9573 case 4:
9574 return "fldln2";
9575 case 5:
9576 return "fldl2e";
9577 case 6:
9578 return "fldl2t";
9579 case 7:
9580 return "fldpi";
9581 case 8:
9582 case 9:
9583 return "#";
9584 default:
9585 gcc_unreachable ();
9589 /* Return the CONST_DOUBLE representing the 80387 constant that is
9590 loaded by the specified special instruction. The argument IDX
9591 matches the return value from standard_80387_constant_p. */
9594 standard_80387_constant_rtx (int idx)
9596 int i;
9598 if (! ext_80387_constants_init)
9599 init_ext_80387_constants ();
9601 switch (idx)
9603 case 3:
9604 case 4:
9605 case 5:
9606 case 6:
9607 case 7:
9608 i = idx - 3;
9609 break;
9611 default:
9612 gcc_unreachable ();
9615 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
9616 XFmode);
9619 /* Return 1 if X is all 0s and 2 if x is all 1s
9620 in supported SSE/AVX vector mode. */
9623 standard_sse_constant_p (rtx x)
9625 machine_mode mode;
9627 if (!TARGET_SSE)
9628 return 0;
9630 mode = GET_MODE (x);
9632 if (x == const0_rtx || x == CONST0_RTX (mode))
9633 return 1;
9634 if (vector_all_ones_operand (x, mode))
9635 switch (mode)
9637 case V16QImode:
9638 case V8HImode:
9639 case V4SImode:
9640 case V2DImode:
9641 if (TARGET_SSE2)
9642 return 2;
9643 case V32QImode:
9644 case V16HImode:
9645 case V8SImode:
9646 case V4DImode:
9647 if (TARGET_AVX2)
9648 return 2;
9649 case V64QImode:
9650 case V32HImode:
9651 case V16SImode:
9652 case V8DImode:
9653 if (TARGET_AVX512F)
9654 return 2;
9655 default:
9656 break;
9659 return 0;
9662 /* Return the opcode of the special instruction to be used to load
9663 the constant X. */
9665 const char *
9666 standard_sse_constant_opcode (rtx_insn *insn, rtx x)
9668 switch (standard_sse_constant_p (x))
9670 case 1:
9671 switch (get_attr_mode (insn))
9673 case MODE_XI:
9674 return "vpxord\t%g0, %g0, %g0";
9675 case MODE_V16SF:
9676 return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
9677 : "vpxord\t%g0, %g0, %g0";
9678 case MODE_V8DF:
9679 return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
9680 : "vpxorq\t%g0, %g0, %g0";
9681 case MODE_TI:
9682 return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
9683 : "%vpxor\t%0, %d0";
9684 case MODE_V2DF:
9685 return "%vxorpd\t%0, %d0";
9686 case MODE_V4SF:
9687 return "%vxorps\t%0, %d0";
9689 case MODE_OI:
9690 return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
9691 : "vpxor\t%x0, %x0, %x0";
9692 case MODE_V4DF:
9693 return "vxorpd\t%x0, %x0, %x0";
9694 case MODE_V8SF:
9695 return "vxorps\t%x0, %x0, %x0";
9697 default:
9698 break;
9701 case 2:
9702 if (TARGET_AVX512VL
9703 || get_attr_mode (insn) == MODE_XI
9704 || get_attr_mode (insn) == MODE_V8DF
9705 || get_attr_mode (insn) == MODE_V16SF)
9706 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9707 if (TARGET_AVX)
9708 return "vpcmpeqd\t%0, %0, %0";
9709 else
9710 return "pcmpeqd\t%0, %0";
9712 default:
9713 break;
9715 gcc_unreachable ();
9718 /* Returns true if OP contains a symbol reference */
9720 bool
9721 symbolic_reference_mentioned_p (rtx op)
9723 const char *fmt;
9724 int i;
9726 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9727 return true;
9729 fmt = GET_RTX_FORMAT (GET_CODE (op));
9730 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9732 if (fmt[i] == 'E')
9734 int j;
9736 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9737 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9738 return true;
9741 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9742 return true;
9745 return false;
9748 /* Return true if it is appropriate to emit `ret' instructions in the
9749 body of a function. Do this only if the epilogue is simple, needing a
9750 couple of insns. Prior to reloading, we can't tell how many registers
9751 must be saved, so return false then. Return false if there is no frame
9752 marker to de-allocate. */
9754 bool
9755 ix86_can_use_return_insn_p (void)
9757 struct ix86_frame frame;
9759 if (! reload_completed || frame_pointer_needed)
9760 return 0;
9762 /* Don't allow more than 32k pop, since that's all we can do
9763 with one instruction. */
9764 if (crtl->args.pops_args && crtl->args.size >= 32768)
9765 return 0;
9767 ix86_compute_frame_layout (&frame);
9768 return (frame.stack_pointer_offset == UNITS_PER_WORD
9769 && (frame.nregs + frame.nsseregs) == 0);
9772 /* Value should be nonzero if functions must have frame pointers.
9773 Zero means the frame pointer need not be set up (and parms may
9774 be accessed via the stack pointer) in functions that seem suitable. */
9776 static bool
9777 ix86_frame_pointer_required (void)
9779 /* If we accessed previous frames, then the generated code expects
9780 to be able to access the saved ebp value in our frame. */
9781 if (cfun->machine->accesses_prev_frame)
9782 return true;
9784 /* Several x86 os'es need a frame pointer for other reasons,
9785 usually pertaining to setjmp. */
9786 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9787 return true;
9789 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9790 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9791 return true;
9793 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9794 allocation is 4GB. */
9795 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9796 return true;
9798 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9799 turns off the frame pointer by default. Turn it back on now if
9800 we've not got a leaf function. */
9801 if (TARGET_OMIT_LEAF_FRAME_POINTER
9802 && (!crtl->is_leaf
9803 || ix86_current_function_calls_tls_descriptor))
9804 return true;
9806 if (crtl->profile && !flag_fentry)
9807 return true;
9809 return false;
9812 /* Record that the current function accesses previous call frames. */
9814 void
9815 ix86_setup_frame_addresses (void)
9817 cfun->machine->accesses_prev_frame = 1;
9820 #ifndef USE_HIDDEN_LINKONCE
9821 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9822 # define USE_HIDDEN_LINKONCE 1
9823 # else
9824 # define USE_HIDDEN_LINKONCE 0
9825 # endif
9826 #endif
9828 static int pic_labels_used;
9830 /* Fills in the label name that should be used for a pc thunk for
9831 the given register. */
9833 static void
9834 get_pc_thunk_name (char name[32], unsigned int regno)
9836 gcc_assert (!TARGET_64BIT);
9838 if (USE_HIDDEN_LINKONCE)
9839 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9840 else
9841 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9845 /* This function generates code for -fpic that loads %ebx with
9846 the return address of the caller and then returns. */
9848 static void
9849 ix86_code_end (void)
9851 rtx xops[2];
9852 int regno;
9854 for (regno = AX_REG; regno <= SP_REG; regno++)
9856 char name[32];
9857 tree decl;
9859 if (!(pic_labels_used & (1 << regno)))
9860 continue;
9862 get_pc_thunk_name (name, regno);
9864 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9865 get_identifier (name),
9866 build_function_type_list (void_type_node, NULL_TREE));
9867 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9868 NULL_TREE, void_type_node);
9869 TREE_PUBLIC (decl) = 1;
9870 TREE_STATIC (decl) = 1;
9871 DECL_IGNORED_P (decl) = 1;
9873 #if TARGET_MACHO
9874 if (TARGET_MACHO)
9876 switch_to_section (darwin_sections[text_coal_section]);
9877 fputs ("\t.weak_definition\t", asm_out_file);
9878 assemble_name (asm_out_file, name);
9879 fputs ("\n\t.private_extern\t", asm_out_file);
9880 assemble_name (asm_out_file, name);
9881 putc ('\n', asm_out_file);
9882 ASM_OUTPUT_LABEL (asm_out_file, name);
9883 DECL_WEAK (decl) = 1;
9885 else
9886 #endif
9887 if (USE_HIDDEN_LINKONCE)
9889 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
9891 targetm.asm_out.unique_section (decl, 0);
9892 switch_to_section (get_named_section (decl, NULL, 0));
9894 targetm.asm_out.globalize_label (asm_out_file, name);
9895 fputs ("\t.hidden\t", asm_out_file);
9896 assemble_name (asm_out_file, name);
9897 putc ('\n', asm_out_file);
9898 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9900 else
9902 switch_to_section (text_section);
9903 ASM_OUTPUT_LABEL (asm_out_file, name);
9906 DECL_INITIAL (decl) = make_node (BLOCK);
9907 current_function_decl = decl;
9908 init_function_start (decl);
9909 first_function_block_is_cold = false;
9910 /* Make sure unwind info is emitted for the thunk if needed. */
9911 final_start_function (emit_barrier (), asm_out_file, 1);
9913 /* Pad stack IP move with 4 instructions (two NOPs count
9914 as one instruction). */
9915 if (TARGET_PAD_SHORT_FUNCTION)
9917 int i = 8;
9919 while (i--)
9920 fputs ("\tnop\n", asm_out_file);
9923 xops[0] = gen_rtx_REG (Pmode, regno);
9924 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9925 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9926 output_asm_insn ("%!ret", NULL);
9927 final_end_function ();
9928 init_insn_lengths ();
9929 free_after_compilation (cfun);
9930 set_cfun (NULL);
9931 current_function_decl = NULL;
9934 if (flag_split_stack)
9935 file_end_indicate_split_stack ();
9938 /* Emit code for the SET_GOT patterns. */
9940 const char *
9941 output_set_got (rtx dest, rtx label)
9943 rtx xops[3];
9945 xops[0] = dest;
9947 if (TARGET_VXWORKS_RTP && flag_pic)
9949 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9950 xops[2] = gen_rtx_MEM (Pmode,
9951 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9952 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9954 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9955 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9956 an unadorned address. */
9957 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9958 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9959 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9960 return "";
9963 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9965 if (!flag_pic)
9967 if (TARGET_MACHO)
9968 /* We don't need a pic base, we're not producing pic. */
9969 gcc_unreachable ();
9971 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9972 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9973 targetm.asm_out.internal_label (asm_out_file, "L",
9974 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9976 else
9978 char name[32];
9979 get_pc_thunk_name (name, REGNO (dest));
9980 pic_labels_used |= 1 << REGNO (dest);
9982 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9983 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9984 output_asm_insn ("%!call\t%X2", xops);
9986 #if TARGET_MACHO
9987 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9988 This is what will be referenced by the Mach-O PIC subsystem. */
9989 if (machopic_should_output_picbase_label () || !label)
9990 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9992 /* When we are restoring the pic base at the site of a nonlocal label,
9993 and we decided to emit the pic base above, we will still output a
9994 local label used for calculating the correction offset (even though
9995 the offset will be 0 in that case). */
9996 if (label)
9997 targetm.asm_out.internal_label (asm_out_file, "L",
9998 CODE_LABEL_NUMBER (label));
9999 #endif
10002 if (!TARGET_MACHO)
10003 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
10005 return "";
10008 /* Generate an "push" pattern for input ARG. */
10010 static rtx
10011 gen_push (rtx arg)
10013 struct machine_function *m = cfun->machine;
10015 if (m->fs.cfa_reg == stack_pointer_rtx)
10016 m->fs.cfa_offset += UNITS_PER_WORD;
10017 m->fs.sp_offset += UNITS_PER_WORD;
10019 if (REG_P (arg) && GET_MODE (arg) != word_mode)
10020 arg = gen_rtx_REG (word_mode, REGNO (arg));
10022 return gen_rtx_SET (gen_rtx_MEM (word_mode,
10023 gen_rtx_PRE_DEC (Pmode,
10024 stack_pointer_rtx)),
10025 arg);
10028 /* Generate an "pop" pattern for input ARG. */
10030 static rtx
10031 gen_pop (rtx arg)
10033 if (REG_P (arg) && GET_MODE (arg) != word_mode)
10034 arg = gen_rtx_REG (word_mode, REGNO (arg));
10036 return gen_rtx_SET (arg,
10037 gen_rtx_MEM (word_mode,
10038 gen_rtx_POST_INC (Pmode,
10039 stack_pointer_rtx)));
10042 /* Return >= 0 if there is an unused call-clobbered register available
10043 for the entire function. */
10045 static unsigned int
10046 ix86_select_alt_pic_regnum (void)
10048 if (ix86_use_pseudo_pic_reg ())
10049 return INVALID_REGNUM;
10051 if (crtl->is_leaf
10052 && !crtl->profile
10053 && !ix86_current_function_calls_tls_descriptor)
10055 int i, drap;
10056 /* Can't use the same register for both PIC and DRAP. */
10057 if (crtl->drap_reg)
10058 drap = REGNO (crtl->drap_reg);
10059 else
10060 drap = -1;
10061 for (i = 2; i >= 0; --i)
10062 if (i != drap && !df_regs_ever_live_p (i))
10063 return i;
10066 return INVALID_REGNUM;
10069 /* Return TRUE if we need to save REGNO. */
10071 static bool
10072 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
10074 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
10075 && pic_offset_table_rtx)
10077 if (ix86_use_pseudo_pic_reg ())
10079 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
10080 _mcount in prologue. */
10081 if (!TARGET_64BIT && flag_pic && crtl->profile)
10082 return true;
10084 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
10085 || crtl->profile
10086 || crtl->calls_eh_return
10087 || crtl->uses_const_pool
10088 || cfun->has_nonlocal_label)
10089 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
10092 if (crtl->calls_eh_return && maybe_eh_return)
10094 unsigned i;
10095 for (i = 0; ; i++)
10097 unsigned test = EH_RETURN_DATA_REGNO (i);
10098 if (test == INVALID_REGNUM)
10099 break;
10100 if (test == regno)
10101 return true;
10105 if (crtl->drap_reg
10106 && regno == REGNO (crtl->drap_reg)
10107 && !cfun->machine->no_drap_save_restore)
10108 return true;
10110 return (df_regs_ever_live_p (regno)
10111 && !call_used_regs[regno]
10112 && !fixed_regs[regno]
10113 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
10116 /* Return number of saved general prupose registers. */
10118 static int
10119 ix86_nsaved_regs (void)
10121 int nregs = 0;
10122 int regno;
10124 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10125 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10126 nregs ++;
10127 return nregs;
10130 /* Return number of saved SSE registrers. */
10132 static int
10133 ix86_nsaved_sseregs (void)
10135 int nregs = 0;
10136 int regno;
10138 if (!TARGET_64BIT_MS_ABI)
10139 return 0;
10140 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10141 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10142 nregs ++;
10143 return nregs;
10146 /* Given FROM and TO register numbers, say whether this elimination is
10147 allowed. If stack alignment is needed, we can only replace argument
10148 pointer with hard frame pointer, or replace frame pointer with stack
10149 pointer. Otherwise, frame pointer elimination is automatically
10150 handled and all other eliminations are valid. */
10152 static bool
10153 ix86_can_eliminate (const int from, const int to)
10155 if (stack_realign_fp)
10156 return ((from == ARG_POINTER_REGNUM
10157 && to == HARD_FRAME_POINTER_REGNUM)
10158 || (from == FRAME_POINTER_REGNUM
10159 && to == STACK_POINTER_REGNUM));
10160 else
10161 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
10164 /* Return the offset between two registers, one to be eliminated, and the other
10165 its replacement, at the start of a routine. */
10167 HOST_WIDE_INT
10168 ix86_initial_elimination_offset (int from, int to)
10170 struct ix86_frame frame;
10171 ix86_compute_frame_layout (&frame);
10173 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10174 return frame.hard_frame_pointer_offset;
10175 else if (from == FRAME_POINTER_REGNUM
10176 && to == HARD_FRAME_POINTER_REGNUM)
10177 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
10178 else
10180 gcc_assert (to == STACK_POINTER_REGNUM);
10182 if (from == ARG_POINTER_REGNUM)
10183 return frame.stack_pointer_offset;
10185 gcc_assert (from == FRAME_POINTER_REGNUM);
10186 return frame.stack_pointer_offset - frame.frame_pointer_offset;
10190 /* In a dynamically-aligned function, we can't know the offset from
10191 stack pointer to frame pointer, so we must ensure that setjmp
10192 eliminates fp against the hard fp (%ebp) rather than trying to
10193 index from %esp up to the top of the frame across a gap that is
10194 of unknown (at compile-time) size. */
10195 static rtx
10196 ix86_builtin_setjmp_frame_value (void)
10198 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
10201 /* When using -fsplit-stack, the allocation routines set a field in
10202 the TCB to the bottom of the stack plus this much space, measured
10203 in bytes. */
10205 #define SPLIT_STACK_AVAILABLE 256
10207 /* Fill structure ix86_frame about frame of currently computed function. */
10209 static void
10210 ix86_compute_frame_layout (struct ix86_frame *frame)
10212 unsigned HOST_WIDE_INT stack_alignment_needed;
10213 HOST_WIDE_INT offset;
10214 unsigned HOST_WIDE_INT preferred_alignment;
10215 HOST_WIDE_INT size = get_frame_size ();
10216 HOST_WIDE_INT to_allocate;
10218 frame->nregs = ix86_nsaved_regs ();
10219 frame->nsseregs = ix86_nsaved_sseregs ();
10221 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
10222 function prologues and leaf. */
10223 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
10224 && (!crtl->is_leaf || cfun->calls_alloca != 0
10225 || ix86_current_function_calls_tls_descriptor))
10227 crtl->preferred_stack_boundary = 128;
10228 crtl->stack_alignment_needed = 128;
10230 /* preferred_stack_boundary is never updated for call
10231 expanded from tls descriptor. Update it here. We don't update it in
10232 expand stage because according to the comments before
10233 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
10234 away. */
10235 else if (ix86_current_function_calls_tls_descriptor
10236 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
10238 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
10239 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
10240 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
10243 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
10244 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
10246 gcc_assert (!size || stack_alignment_needed);
10247 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
10248 gcc_assert (preferred_alignment <= stack_alignment_needed);
10250 /* For SEH we have to limit the amount of code movement into the prologue.
10251 At present we do this via a BLOCKAGE, at which point there's very little
10252 scheduling that can be done, which means that there's very little point
10253 in doing anything except PUSHs. */
10254 if (TARGET_SEH)
10255 cfun->machine->use_fast_prologue_epilogue = false;
10257 /* During reload iteration the amount of registers saved can change.
10258 Recompute the value as needed. Do not recompute when amount of registers
10259 didn't change as reload does multiple calls to the function and does not
10260 expect the decision to change within single iteration. */
10261 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
10262 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
10264 int count = frame->nregs;
10265 struct cgraph_node *node = cgraph_node::get (current_function_decl);
10267 cfun->machine->use_fast_prologue_epilogue_nregs = count;
10269 /* The fast prologue uses move instead of push to save registers. This
10270 is significantly longer, but also executes faster as modern hardware
10271 can execute the moves in parallel, but can't do that for push/pop.
10273 Be careful about choosing what prologue to emit: When function takes
10274 many instructions to execute we may use slow version as well as in
10275 case function is known to be outside hot spot (this is known with
10276 feedback only). Weight the size of function by number of registers
10277 to save as it is cheap to use one or two push instructions but very
10278 slow to use many of them. */
10279 if (count)
10280 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
10281 if (node->frequency < NODE_FREQUENCY_NORMAL
10282 || (flag_branch_probabilities
10283 && node->frequency < NODE_FREQUENCY_HOT))
10284 cfun->machine->use_fast_prologue_epilogue = false;
10285 else
10286 cfun->machine->use_fast_prologue_epilogue
10287 = !expensive_function_p (count);
10290 frame->save_regs_using_mov
10291 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
10292 /* If static stack checking is enabled and done with probes,
10293 the registers need to be saved before allocating the frame. */
10294 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
10296 /* Skip return address. */
10297 offset = UNITS_PER_WORD;
10299 /* Skip pushed static chain. */
10300 if (ix86_static_chain_on_stack)
10301 offset += UNITS_PER_WORD;
10303 /* Skip saved base pointer. */
10304 if (frame_pointer_needed)
10305 offset += UNITS_PER_WORD;
10306 frame->hfp_save_offset = offset;
10308 /* The traditional frame pointer location is at the top of the frame. */
10309 frame->hard_frame_pointer_offset = offset;
10311 /* Register save area */
10312 offset += frame->nregs * UNITS_PER_WORD;
10313 frame->reg_save_offset = offset;
10315 /* On SEH target, registers are pushed just before the frame pointer
10316 location. */
10317 if (TARGET_SEH)
10318 frame->hard_frame_pointer_offset = offset;
10320 /* Align and set SSE register save area. */
10321 if (frame->nsseregs)
10323 /* The only ABI that has saved SSE registers (Win64) also has a
10324 16-byte aligned default stack, and thus we don't need to be
10325 within the re-aligned local stack frame to save them. */
10326 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
10327 offset = (offset + 16 - 1) & -16;
10328 offset += frame->nsseregs * 16;
10330 frame->sse_reg_save_offset = offset;
10332 /* The re-aligned stack starts here. Values before this point are not
10333 directly comparable with values below this point. In order to make
10334 sure that no value happens to be the same before and after, force
10335 the alignment computation below to add a non-zero value. */
10336 if (stack_realign_fp)
10337 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
10339 /* Va-arg area */
10340 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
10341 offset += frame->va_arg_size;
10343 /* Align start of frame for local function. */
10344 if (stack_realign_fp
10345 || offset != frame->sse_reg_save_offset
10346 || size != 0
10347 || !crtl->is_leaf
10348 || cfun->calls_alloca
10349 || ix86_current_function_calls_tls_descriptor)
10350 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
10352 /* Frame pointer points here. */
10353 frame->frame_pointer_offset = offset;
10355 offset += size;
10357 /* Add outgoing arguments area. Can be skipped if we eliminated
10358 all the function calls as dead code.
10359 Skipping is however impossible when function calls alloca. Alloca
10360 expander assumes that last crtl->outgoing_args_size
10361 of stack frame are unused. */
10362 if (ACCUMULATE_OUTGOING_ARGS
10363 && (!crtl->is_leaf || cfun->calls_alloca
10364 || ix86_current_function_calls_tls_descriptor))
10366 offset += crtl->outgoing_args_size;
10367 frame->outgoing_arguments_size = crtl->outgoing_args_size;
10369 else
10370 frame->outgoing_arguments_size = 0;
10372 /* Align stack boundary. Only needed if we're calling another function
10373 or using alloca. */
10374 if (!crtl->is_leaf || cfun->calls_alloca
10375 || ix86_current_function_calls_tls_descriptor)
10376 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
10378 /* We've reached end of stack frame. */
10379 frame->stack_pointer_offset = offset;
10381 /* Size prologue needs to allocate. */
10382 to_allocate = offset - frame->sse_reg_save_offset;
10384 if ((!to_allocate && frame->nregs <= 1)
10385 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
10386 frame->save_regs_using_mov = false;
10388 if (ix86_using_red_zone ()
10389 && crtl->sp_is_unchanging
10390 && crtl->is_leaf
10391 && !ix86_current_function_calls_tls_descriptor)
10393 frame->red_zone_size = to_allocate;
10394 if (frame->save_regs_using_mov)
10395 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
10396 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
10397 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
10399 else
10400 frame->red_zone_size = 0;
10401 frame->stack_pointer_offset -= frame->red_zone_size;
10403 /* The SEH frame pointer location is near the bottom of the frame.
10404 This is enforced by the fact that the difference between the
10405 stack pointer and the frame pointer is limited to 240 bytes in
10406 the unwind data structure. */
10407 if (TARGET_SEH)
10409 HOST_WIDE_INT diff;
10411 /* If we can leave the frame pointer where it is, do so. Also, returns
10412 the establisher frame for __builtin_frame_address (0). */
10413 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
10414 if (diff <= SEH_MAX_FRAME_SIZE
10415 && (diff > 240 || (diff & 15) != 0)
10416 && !crtl->accesses_prior_frames)
10418 /* Ideally we'd determine what portion of the local stack frame
10419 (within the constraint of the lowest 240) is most heavily used.
10420 But without that complication, simply bias the frame pointer
10421 by 128 bytes so as to maximize the amount of the local stack
10422 frame that is addressable with 8-bit offsets. */
10423 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
10428 /* This is semi-inlined memory_address_length, but simplified
10429 since we know that we're always dealing with reg+offset, and
10430 to avoid having to create and discard all that rtl. */
10432 static inline int
10433 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
10435 int len = 4;
10437 if (offset == 0)
10439 /* EBP and R13 cannot be encoded without an offset. */
10440 len = (regno == BP_REG || regno == R13_REG);
10442 else if (IN_RANGE (offset, -128, 127))
10443 len = 1;
10445 /* ESP and R12 must be encoded with a SIB byte. */
10446 if (regno == SP_REG || regno == R12_REG)
10447 len++;
10449 return len;
10452 /* Return an RTX that points to CFA_OFFSET within the stack frame.
10453 The valid base registers are taken from CFUN->MACHINE->FS. */
10455 static rtx
10456 choose_baseaddr (HOST_WIDE_INT cfa_offset)
10458 const struct machine_function *m = cfun->machine;
10459 rtx base_reg = NULL;
10460 HOST_WIDE_INT base_offset = 0;
10462 if (m->use_fast_prologue_epilogue)
10464 /* Choose the base register most likely to allow the most scheduling
10465 opportunities. Generally FP is valid throughout the function,
10466 while DRAP must be reloaded within the epilogue. But choose either
10467 over the SP due to increased encoding size. */
10469 if (m->fs.fp_valid)
10471 base_reg = hard_frame_pointer_rtx;
10472 base_offset = m->fs.fp_offset - cfa_offset;
10474 else if (m->fs.drap_valid)
10476 base_reg = crtl->drap_reg;
10477 base_offset = 0 - cfa_offset;
10479 else if (m->fs.sp_valid)
10481 base_reg = stack_pointer_rtx;
10482 base_offset = m->fs.sp_offset - cfa_offset;
10485 else
10487 HOST_WIDE_INT toffset;
10488 int len = 16, tlen;
10490 /* Choose the base register with the smallest address encoding.
10491 With a tie, choose FP > DRAP > SP. */
10492 if (m->fs.sp_valid)
10494 base_reg = stack_pointer_rtx;
10495 base_offset = m->fs.sp_offset - cfa_offset;
10496 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
10498 if (m->fs.drap_valid)
10500 toffset = 0 - cfa_offset;
10501 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
10502 if (tlen <= len)
10504 base_reg = crtl->drap_reg;
10505 base_offset = toffset;
10506 len = tlen;
10509 if (m->fs.fp_valid)
10511 toffset = m->fs.fp_offset - cfa_offset;
10512 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
10513 if (tlen <= len)
10515 base_reg = hard_frame_pointer_rtx;
10516 base_offset = toffset;
10517 len = tlen;
10521 gcc_assert (base_reg != NULL);
10523 return plus_constant (Pmode, base_reg, base_offset);
10526 /* Emit code to save registers in the prologue. */
10528 static void
10529 ix86_emit_save_regs (void)
10531 unsigned int regno;
10532 rtx_insn *insn;
10534 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
10535 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10537 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
10538 RTX_FRAME_RELATED_P (insn) = 1;
10542 /* Emit a single register save at CFA - CFA_OFFSET. */
10544 static void
10545 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
10546 HOST_WIDE_INT cfa_offset)
10548 struct machine_function *m = cfun->machine;
10549 rtx reg = gen_rtx_REG (mode, regno);
10550 rtx mem, addr, base, insn;
10552 addr = choose_baseaddr (cfa_offset);
10553 mem = gen_frame_mem (mode, addr);
10555 /* For SSE saves, we need to indicate the 128-bit alignment. */
10556 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
10558 insn = emit_move_insn (mem, reg);
10559 RTX_FRAME_RELATED_P (insn) = 1;
10561 base = addr;
10562 if (GET_CODE (base) == PLUS)
10563 base = XEXP (base, 0);
10564 gcc_checking_assert (REG_P (base));
10566 /* When saving registers into a re-aligned local stack frame, avoid
10567 any tricky guessing by dwarf2out. */
10568 if (m->fs.realigned)
10570 gcc_checking_assert (stack_realign_drap);
10572 if (regno == REGNO (crtl->drap_reg))
10574 /* A bit of a hack. We force the DRAP register to be saved in
10575 the re-aligned stack frame, which provides us with a copy
10576 of the CFA that will last past the prologue. Install it. */
10577 gcc_checking_assert (cfun->machine->fs.fp_valid);
10578 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10579 cfun->machine->fs.fp_offset - cfa_offset);
10580 mem = gen_rtx_MEM (mode, addr);
10581 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
10583 else
10585 /* The frame pointer is a stable reference within the
10586 aligned frame. Use it. */
10587 gcc_checking_assert (cfun->machine->fs.fp_valid);
10588 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10589 cfun->machine->fs.fp_offset - cfa_offset);
10590 mem = gen_rtx_MEM (mode, addr);
10591 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
10595 /* The memory may not be relative to the current CFA register,
10596 which means that we may need to generate a new pattern for
10597 use by the unwind info. */
10598 else if (base != m->fs.cfa_reg)
10600 addr = plus_constant (Pmode, m->fs.cfa_reg,
10601 m->fs.cfa_offset - cfa_offset);
10602 mem = gen_rtx_MEM (mode, addr);
10603 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
10607 /* Emit code to save registers using MOV insns.
10608 First register is stored at CFA - CFA_OFFSET. */
10609 static void
10610 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
10612 unsigned int regno;
10614 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10615 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10617 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
10618 cfa_offset -= UNITS_PER_WORD;
10622 /* Emit code to save SSE registers using MOV insns.
10623 First register is stored at CFA - CFA_OFFSET. */
10624 static void
10625 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
10627 unsigned int regno;
10629 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10630 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10632 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
10633 cfa_offset -= 16;
10637 static GTY(()) rtx queued_cfa_restores;
10639 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
10640 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
10641 Don't add the note if the previously saved value will be left untouched
10642 within stack red-zone till return, as unwinders can find the same value
10643 in the register and on the stack. */
10645 static void
10646 ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
10648 if (!crtl->shrink_wrapped
10649 && cfa_offset <= cfun->machine->fs.red_zone_offset)
10650 return;
10652 if (insn)
10654 add_reg_note (insn, REG_CFA_RESTORE, reg);
10655 RTX_FRAME_RELATED_P (insn) = 1;
10657 else
10658 queued_cfa_restores
10659 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
10662 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
10664 static void
10665 ix86_add_queued_cfa_restore_notes (rtx insn)
10667 rtx last;
10668 if (!queued_cfa_restores)
10669 return;
10670 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
10672 XEXP (last, 1) = REG_NOTES (insn);
10673 REG_NOTES (insn) = queued_cfa_restores;
10674 queued_cfa_restores = NULL_RTX;
10675 RTX_FRAME_RELATED_P (insn) = 1;
10678 /* Expand prologue or epilogue stack adjustment.
10679 The pattern exist to put a dependency on all ebp-based memory accesses.
10680 STYLE should be negative if instructions should be marked as frame related,
10681 zero if %r11 register is live and cannot be freely used and positive
10682 otherwise. */
10684 static void
10685 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
10686 int style, bool set_cfa)
10688 struct machine_function *m = cfun->machine;
10689 rtx insn;
10690 bool add_frame_related_expr = false;
10692 if (Pmode == SImode)
10693 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
10694 else if (x86_64_immediate_operand (offset, DImode))
10695 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
10696 else
10698 rtx tmp;
10699 /* r11 is used by indirect sibcall return as well, set before the
10700 epilogue and used after the epilogue. */
10701 if (style)
10702 tmp = gen_rtx_REG (DImode, R11_REG);
10703 else
10705 gcc_assert (src != hard_frame_pointer_rtx
10706 && dest != hard_frame_pointer_rtx);
10707 tmp = hard_frame_pointer_rtx;
10709 insn = emit_insn (gen_rtx_SET (tmp, offset));
10710 if (style < 0)
10711 add_frame_related_expr = true;
10713 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10716 insn = emit_insn (insn);
10717 if (style >= 0)
10718 ix86_add_queued_cfa_restore_notes (insn);
10720 if (set_cfa)
10722 rtx r;
10724 gcc_assert (m->fs.cfa_reg == src);
10725 m->fs.cfa_offset += INTVAL (offset);
10726 m->fs.cfa_reg = dest;
10728 r = gen_rtx_PLUS (Pmode, src, offset);
10729 r = gen_rtx_SET (dest, r);
10730 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10731 RTX_FRAME_RELATED_P (insn) = 1;
10733 else if (style < 0)
10735 RTX_FRAME_RELATED_P (insn) = 1;
10736 if (add_frame_related_expr)
10738 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10739 r = gen_rtx_SET (dest, r);
10740 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10744 if (dest == stack_pointer_rtx)
10746 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10747 bool valid = m->fs.sp_valid;
10749 if (src == hard_frame_pointer_rtx)
10751 valid = m->fs.fp_valid;
10752 ooffset = m->fs.fp_offset;
10754 else if (src == crtl->drap_reg)
10756 valid = m->fs.drap_valid;
10757 ooffset = 0;
10759 else
10761 /* Else there are two possibilities: SP itself, which we set
10762 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10763 taken care of this by hand along the eh_return path. */
10764 gcc_checking_assert (src == stack_pointer_rtx
10765 || offset == const0_rtx);
10768 m->fs.sp_offset = ooffset - INTVAL (offset);
10769 m->fs.sp_valid = valid;
10773 /* Find an available register to be used as dynamic realign argument
10774 pointer regsiter. Such a register will be written in prologue and
10775 used in begin of body, so it must not be
10776 1. parameter passing register.
10777 2. GOT pointer.
10778 We reuse static-chain register if it is available. Otherwise, we
10779 use DI for i386 and R13 for x86-64. We chose R13 since it has
10780 shorter encoding.
10782 Return: the regno of chosen register. */
10784 static unsigned int
10785 find_drap_reg (void)
10787 tree decl = cfun->decl;
10789 if (TARGET_64BIT)
10791 /* Use R13 for nested function or function need static chain.
10792 Since function with tail call may use any caller-saved
10793 registers in epilogue, DRAP must not use caller-saved
10794 register in such case. */
10795 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10796 return R13_REG;
10798 return R10_REG;
10800 else
10802 /* Use DI for nested function or function need static chain.
10803 Since function with tail call may use any caller-saved
10804 registers in epilogue, DRAP must not use caller-saved
10805 register in such case. */
10806 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10807 return DI_REG;
10809 /* Reuse static chain register if it isn't used for parameter
10810 passing. */
10811 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10813 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10814 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10815 return CX_REG;
10817 return DI_REG;
10821 /* Return minimum incoming stack alignment. */
10823 static unsigned int
10824 ix86_minimum_incoming_stack_boundary (bool sibcall)
10826 unsigned int incoming_stack_boundary;
10828 /* Prefer the one specified at command line. */
10829 if (ix86_user_incoming_stack_boundary)
10830 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10831 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10832 if -mstackrealign is used, it isn't used for sibcall check and
10833 estimated stack alignment is 128bit. */
10834 else if (!sibcall
10835 && !TARGET_64BIT
10836 && ix86_force_align_arg_pointer
10837 && crtl->stack_alignment_estimated == 128)
10838 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10839 else
10840 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10842 /* Incoming stack alignment can be changed on individual functions
10843 via force_align_arg_pointer attribute. We use the smallest
10844 incoming stack boundary. */
10845 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10846 && lookup_attribute (ix86_force_align_arg_pointer_string,
10847 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10848 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10850 /* The incoming stack frame has to be aligned at least at
10851 parm_stack_boundary. */
10852 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10853 incoming_stack_boundary = crtl->parm_stack_boundary;
10855 /* Stack at entrance of main is aligned by runtime. We use the
10856 smallest incoming stack boundary. */
10857 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10858 && DECL_NAME (current_function_decl)
10859 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10860 && DECL_FILE_SCOPE_P (current_function_decl))
10861 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10863 return incoming_stack_boundary;
10866 /* Update incoming stack boundary and estimated stack alignment. */
10868 static void
10869 ix86_update_stack_boundary (void)
10871 ix86_incoming_stack_boundary
10872 = ix86_minimum_incoming_stack_boundary (false);
10874 /* x86_64 vararg needs 16byte stack alignment for register save
10875 area. */
10876 if (TARGET_64BIT
10877 && cfun->stdarg
10878 && crtl->stack_alignment_estimated < 128)
10879 crtl->stack_alignment_estimated = 128;
10882 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10883 needed or an rtx for DRAP otherwise. */
10885 static rtx
10886 ix86_get_drap_rtx (void)
10888 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10889 crtl->need_drap = true;
10891 if (stack_realign_drap)
10893 /* Assign DRAP to vDRAP and returns vDRAP */
10894 unsigned int regno = find_drap_reg ();
10895 rtx drap_vreg;
10896 rtx arg_ptr;
10897 rtx_insn *seq, *insn;
10899 arg_ptr = gen_rtx_REG (Pmode, regno);
10900 crtl->drap_reg = arg_ptr;
10902 start_sequence ();
10903 drap_vreg = copy_to_reg (arg_ptr);
10904 seq = get_insns ();
10905 end_sequence ();
10907 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10908 if (!optimize)
10910 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10911 RTX_FRAME_RELATED_P (insn) = 1;
10913 return drap_vreg;
10915 else
10916 return NULL;
10919 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10921 static rtx
10922 ix86_internal_arg_pointer (void)
10924 return virtual_incoming_args_rtx;
10927 struct scratch_reg {
10928 rtx reg;
10929 bool saved;
10932 /* Return a short-lived scratch register for use on function entry.
10933 In 32-bit mode, it is valid only after the registers are saved
10934 in the prologue. This register must be released by means of
10935 release_scratch_register_on_entry once it is dead. */
10937 static void
10938 get_scratch_register_on_entry (struct scratch_reg *sr)
10940 int regno;
10942 sr->saved = false;
10944 if (TARGET_64BIT)
10946 /* We always use R11 in 64-bit mode. */
10947 regno = R11_REG;
10949 else
10951 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10952 bool fastcall_p
10953 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10954 bool thiscall_p
10955 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10956 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10957 int regparm = ix86_function_regparm (fntype, decl);
10958 int drap_regno
10959 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10961 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10962 for the static chain register. */
10963 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10964 && drap_regno != AX_REG)
10965 regno = AX_REG;
10966 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10967 for the static chain register. */
10968 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10969 regno = AX_REG;
10970 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10971 regno = DX_REG;
10972 /* ecx is the static chain register. */
10973 else if (regparm < 3 && !fastcall_p && !thiscall_p
10974 && !static_chain_p
10975 && drap_regno != CX_REG)
10976 regno = CX_REG;
10977 else if (ix86_save_reg (BX_REG, true))
10978 regno = BX_REG;
10979 /* esi is the static chain register. */
10980 else if (!(regparm == 3 && static_chain_p)
10981 && ix86_save_reg (SI_REG, true))
10982 regno = SI_REG;
10983 else if (ix86_save_reg (DI_REG, true))
10984 regno = DI_REG;
10985 else
10987 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10988 sr->saved = true;
10992 sr->reg = gen_rtx_REG (Pmode, regno);
10993 if (sr->saved)
10995 rtx_insn *insn = emit_insn (gen_push (sr->reg));
10996 RTX_FRAME_RELATED_P (insn) = 1;
11000 /* Release a scratch register obtained from the preceding function. */
11002 static void
11003 release_scratch_register_on_entry (struct scratch_reg *sr)
11005 if (sr->saved)
11007 struct machine_function *m = cfun->machine;
11008 rtx x, insn = emit_insn (gen_pop (sr->reg));
11010 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
11011 RTX_FRAME_RELATED_P (insn) = 1;
11012 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
11013 x = gen_rtx_SET (stack_pointer_rtx, x);
11014 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
11015 m->fs.sp_offset -= UNITS_PER_WORD;
11019 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
11021 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
11023 static void
11024 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
11026 /* We skip the probe for the first interval + a small dope of 4 words and
11027 probe that many bytes past the specified size to maintain a protection
11028 area at the botton of the stack. */
11029 const int dope = 4 * UNITS_PER_WORD;
11030 rtx size_rtx = GEN_INT (size), last;
11032 /* See if we have a constant small number of probes to generate. If so,
11033 that's the easy case. The run-time loop is made up of 11 insns in the
11034 generic case while the compile-time loop is made up of 3+2*(n-1) insns
11035 for n # of intervals. */
11036 if (size <= 5 * PROBE_INTERVAL)
11038 HOST_WIDE_INT i, adjust;
11039 bool first_probe = true;
11041 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
11042 values of N from 1 until it exceeds SIZE. If only one probe is
11043 needed, this will not generate any code. Then adjust and probe
11044 to PROBE_INTERVAL + SIZE. */
11045 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
11047 if (first_probe)
11049 adjust = 2 * PROBE_INTERVAL + dope;
11050 first_probe = false;
11052 else
11053 adjust = PROBE_INTERVAL;
11055 emit_insn (gen_rtx_SET (stack_pointer_rtx,
11056 plus_constant (Pmode, stack_pointer_rtx,
11057 -adjust)));
11058 emit_stack_probe (stack_pointer_rtx);
11061 if (first_probe)
11062 adjust = size + PROBE_INTERVAL + dope;
11063 else
11064 adjust = size + PROBE_INTERVAL - i;
11066 emit_insn (gen_rtx_SET (stack_pointer_rtx,
11067 plus_constant (Pmode, stack_pointer_rtx,
11068 -adjust)));
11069 emit_stack_probe (stack_pointer_rtx);
11071 /* Adjust back to account for the additional first interval. */
11072 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
11073 plus_constant (Pmode, stack_pointer_rtx,
11074 PROBE_INTERVAL + dope)));
11077 /* Otherwise, do the same as above, but in a loop. Note that we must be
11078 extra careful with variables wrapping around because we might be at
11079 the very top (or the very bottom) of the address space and we have
11080 to be able to handle this case properly; in particular, we use an
11081 equality test for the loop condition. */
11082 else
11084 HOST_WIDE_INT rounded_size;
11085 struct scratch_reg sr;
11087 get_scratch_register_on_entry (&sr);
11090 /* Step 1: round SIZE to the previous multiple of the interval. */
11092 rounded_size = size & -PROBE_INTERVAL;
11095 /* Step 2: compute initial and final value of the loop counter. */
11097 /* SP = SP_0 + PROBE_INTERVAL. */
11098 emit_insn (gen_rtx_SET (stack_pointer_rtx,
11099 plus_constant (Pmode, stack_pointer_rtx,
11100 - (PROBE_INTERVAL + dope))));
11102 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
11103 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
11104 emit_insn (gen_rtx_SET (sr.reg,
11105 gen_rtx_PLUS (Pmode, sr.reg,
11106 stack_pointer_rtx)));
11109 /* Step 3: the loop
11111 while (SP != LAST_ADDR)
11113 SP = SP + PROBE_INTERVAL
11114 probe at SP
11117 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
11118 values of N from 1 until it is equal to ROUNDED_SIZE. */
11120 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
11123 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
11124 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
11126 if (size != rounded_size)
11128 emit_insn (gen_rtx_SET (stack_pointer_rtx,
11129 plus_constant (Pmode, stack_pointer_rtx,
11130 rounded_size - size)));
11131 emit_stack_probe (stack_pointer_rtx);
11134 /* Adjust back to account for the additional first interval. */
11135 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
11136 plus_constant (Pmode, stack_pointer_rtx,
11137 PROBE_INTERVAL + dope)));
11139 release_scratch_register_on_entry (&sr);
11142 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
11144 /* Even if the stack pointer isn't the CFA register, we need to correctly
11145 describe the adjustments made to it, in particular differentiate the
11146 frame-related ones from the frame-unrelated ones. */
11147 if (size > 0)
11149 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
11150 XVECEXP (expr, 0, 0)
11151 = gen_rtx_SET (stack_pointer_rtx,
11152 plus_constant (Pmode, stack_pointer_rtx, -size));
11153 XVECEXP (expr, 0, 1)
11154 = gen_rtx_SET (stack_pointer_rtx,
11155 plus_constant (Pmode, stack_pointer_rtx,
11156 PROBE_INTERVAL + dope + size));
11157 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
11158 RTX_FRAME_RELATED_P (last) = 1;
11160 cfun->machine->fs.sp_offset += size;
11163 /* Make sure nothing is scheduled before we are done. */
11164 emit_insn (gen_blockage ());
11167 /* Adjust the stack pointer up to REG while probing it. */
11169 const char *
11170 output_adjust_stack_and_probe (rtx reg)
11172 static int labelno = 0;
11173 char loop_lab[32], end_lab[32];
11174 rtx xops[2];
11176 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
11177 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
11179 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
11181 /* Jump to END_LAB if SP == LAST_ADDR. */
11182 xops[0] = stack_pointer_rtx;
11183 xops[1] = reg;
11184 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
11185 fputs ("\tje\t", asm_out_file);
11186 assemble_name_raw (asm_out_file, end_lab);
11187 fputc ('\n', asm_out_file);
11189 /* SP = SP + PROBE_INTERVAL. */
11190 xops[1] = GEN_INT (PROBE_INTERVAL);
11191 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
11193 /* Probe at SP. */
11194 xops[1] = const0_rtx;
11195 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
11197 fprintf (asm_out_file, "\tjmp\t");
11198 assemble_name_raw (asm_out_file, loop_lab);
11199 fputc ('\n', asm_out_file);
11201 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
11203 return "";
11206 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
11207 inclusive. These are offsets from the current stack pointer. */
11209 static void
11210 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
11212 /* See if we have a constant small number of probes to generate. If so,
11213 that's the easy case. The run-time loop is made up of 7 insns in the
11214 generic case while the compile-time loop is made up of n insns for n #
11215 of intervals. */
11216 if (size <= 7 * PROBE_INTERVAL)
11218 HOST_WIDE_INT i;
11220 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
11221 it exceeds SIZE. If only one probe is needed, this will not
11222 generate any code. Then probe at FIRST + SIZE. */
11223 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
11224 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
11225 -(first + i)));
11227 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
11228 -(first + size)));
11231 /* Otherwise, do the same as above, but in a loop. Note that we must be
11232 extra careful with variables wrapping around because we might be at
11233 the very top (or the very bottom) of the address space and we have
11234 to be able to handle this case properly; in particular, we use an
11235 equality test for the loop condition. */
11236 else
11238 HOST_WIDE_INT rounded_size, last;
11239 struct scratch_reg sr;
11241 get_scratch_register_on_entry (&sr);
11244 /* Step 1: round SIZE to the previous multiple of the interval. */
11246 rounded_size = size & -PROBE_INTERVAL;
11249 /* Step 2: compute initial and final value of the loop counter. */
11251 /* TEST_OFFSET = FIRST. */
11252 emit_move_insn (sr.reg, GEN_INT (-first));
11254 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
11255 last = first + rounded_size;
11258 /* Step 3: the loop
11260 while (TEST_ADDR != LAST_ADDR)
11262 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
11263 probe at TEST_ADDR
11266 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
11267 until it is equal to ROUNDED_SIZE. */
11269 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
11272 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
11273 that SIZE is equal to ROUNDED_SIZE. */
11275 if (size != rounded_size)
11276 emit_stack_probe (plus_constant (Pmode,
11277 gen_rtx_PLUS (Pmode,
11278 stack_pointer_rtx,
11279 sr.reg),
11280 rounded_size - size));
11282 release_scratch_register_on_entry (&sr);
11285 /* Make sure nothing is scheduled before we are done. */
11286 emit_insn (gen_blockage ());
11289 /* Probe a range of stack addresses from REG to END, inclusive. These are
11290 offsets from the current stack pointer. */
11292 const char *
11293 output_probe_stack_range (rtx reg, rtx end)
11295 static int labelno = 0;
11296 char loop_lab[32], end_lab[32];
11297 rtx xops[3];
11299 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
11300 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
11302 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
11304 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
11305 xops[0] = reg;
11306 xops[1] = end;
11307 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
11308 fputs ("\tje\t", asm_out_file);
11309 assemble_name_raw (asm_out_file, end_lab);
11310 fputc ('\n', asm_out_file);
11312 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
11313 xops[1] = GEN_INT (PROBE_INTERVAL);
11314 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
11316 /* Probe at TEST_ADDR. */
11317 xops[0] = stack_pointer_rtx;
11318 xops[1] = reg;
11319 xops[2] = const0_rtx;
11320 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
11322 fprintf (asm_out_file, "\tjmp\t");
11323 assemble_name_raw (asm_out_file, loop_lab);
11324 fputc ('\n', asm_out_file);
11326 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
11328 return "";
11331 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
11332 to be generated in correct form. */
11333 static void
11334 ix86_finalize_stack_realign_flags (void)
11336 /* Check if stack realign is really needed after reload, and
11337 stores result in cfun */
11338 unsigned int incoming_stack_boundary
11339 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
11340 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
11341 unsigned int stack_realign = (incoming_stack_boundary
11342 < (crtl->is_leaf
11343 ? crtl->max_used_stack_slot_alignment
11344 : crtl->stack_alignment_needed));
11346 if (crtl->stack_realign_finalized)
11348 /* After stack_realign_needed is finalized, we can't no longer
11349 change it. */
11350 gcc_assert (crtl->stack_realign_needed == stack_realign);
11351 return;
11354 /* If the only reason for frame_pointer_needed is that we conservatively
11355 assumed stack realignment might be needed, but in the end nothing that
11356 needed the stack alignment had been spilled, clear frame_pointer_needed
11357 and say we don't need stack realignment. */
11358 if (stack_realign
11359 && frame_pointer_needed
11360 && crtl->is_leaf
11361 && flag_omit_frame_pointer
11362 && crtl->sp_is_unchanging
11363 && !ix86_current_function_calls_tls_descriptor
11364 && !crtl->accesses_prior_frames
11365 && !cfun->calls_alloca
11366 && !crtl->calls_eh_return
11367 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
11368 && !ix86_frame_pointer_required ()
11369 && get_frame_size () == 0
11370 && ix86_nsaved_sseregs () == 0
11371 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
11373 HARD_REG_SET set_up_by_prologue, prologue_used;
11374 basic_block bb;
11376 CLEAR_HARD_REG_SET (prologue_used);
11377 CLEAR_HARD_REG_SET (set_up_by_prologue);
11378 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
11379 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
11380 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
11381 HARD_FRAME_POINTER_REGNUM);
11382 FOR_EACH_BB_FN (bb, cfun)
11384 rtx_insn *insn;
11385 FOR_BB_INSNS (bb, insn)
11386 if (NONDEBUG_INSN_P (insn)
11387 && requires_stack_frame_p (insn, prologue_used,
11388 set_up_by_prologue))
11390 crtl->stack_realign_needed = stack_realign;
11391 crtl->stack_realign_finalized = true;
11392 return;
11396 /* If drap has been set, but it actually isn't live at the start
11397 of the function, there is no reason to set it up. */
11398 if (crtl->drap_reg)
11400 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11401 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
11403 crtl->drap_reg = NULL_RTX;
11404 crtl->need_drap = false;
11407 else
11408 cfun->machine->no_drap_save_restore = true;
11410 frame_pointer_needed = false;
11411 stack_realign = false;
11412 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
11413 crtl->stack_alignment_needed = incoming_stack_boundary;
11414 crtl->stack_alignment_estimated = incoming_stack_boundary;
11415 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
11416 crtl->preferred_stack_boundary = incoming_stack_boundary;
11417 df_finish_pass (true);
11418 df_scan_alloc (NULL);
11419 df_scan_blocks ();
11420 df_compute_regs_ever_live (true);
11421 df_analyze ();
11424 crtl->stack_realign_needed = stack_realign;
11425 crtl->stack_realign_finalized = true;
11428 /* Delete SET_GOT right after entry block if it is allocated to reg. */
11430 static void
11431 ix86_elim_entry_set_got (rtx reg)
11433 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11434 rtx_insn *c_insn = BB_HEAD (bb);
11435 if (!NONDEBUG_INSN_P (c_insn))
11436 c_insn = next_nonnote_nondebug_insn (c_insn);
11437 if (c_insn && NONJUMP_INSN_P (c_insn))
11439 rtx pat = PATTERN (c_insn);
11440 if (GET_CODE (pat) == PARALLEL)
11442 rtx vec = XVECEXP (pat, 0, 0);
11443 if (GET_CODE (vec) == SET
11444 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
11445 && REGNO (XEXP (vec, 0)) == REGNO (reg))
11446 delete_insn (c_insn);
11451 /* Expand the prologue into a bunch of separate insns. */
11453 void
11454 ix86_expand_prologue (void)
11456 struct machine_function *m = cfun->machine;
11457 rtx insn, t;
11458 struct ix86_frame frame;
11459 HOST_WIDE_INT allocate;
11460 bool int_registers_saved;
11461 bool sse_registers_saved;
11462 rtx static_chain = NULL_RTX;
11464 ix86_finalize_stack_realign_flags ();
11466 /* DRAP should not coexist with stack_realign_fp */
11467 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
11469 memset (&m->fs, 0, sizeof (m->fs));
11471 /* Initialize CFA state for before the prologue. */
11472 m->fs.cfa_reg = stack_pointer_rtx;
11473 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
11475 /* Track SP offset to the CFA. We continue tracking this after we've
11476 swapped the CFA register away from SP. In the case of re-alignment
11477 this is fudged; we're interested to offsets within the local frame. */
11478 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11479 m->fs.sp_valid = true;
11481 ix86_compute_frame_layout (&frame);
11483 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
11485 /* We should have already generated an error for any use of
11486 ms_hook on a nested function. */
11487 gcc_checking_assert (!ix86_static_chain_on_stack);
11489 /* Check if profiling is active and we shall use profiling before
11490 prologue variant. If so sorry. */
11491 if (crtl->profile && flag_fentry != 0)
11492 sorry ("ms_hook_prologue attribute isn%'t compatible "
11493 "with -mfentry for 32-bit");
11495 /* In ix86_asm_output_function_label we emitted:
11496 8b ff movl.s %edi,%edi
11497 55 push %ebp
11498 8b ec movl.s %esp,%ebp
11500 This matches the hookable function prologue in Win32 API
11501 functions in Microsoft Windows XP Service Pack 2 and newer.
11502 Wine uses this to enable Windows apps to hook the Win32 API
11503 functions provided by Wine.
11505 What that means is that we've already set up the frame pointer. */
11507 if (frame_pointer_needed
11508 && !(crtl->drap_reg && crtl->stack_realign_needed))
11510 rtx push, mov;
11512 /* We've decided to use the frame pointer already set up.
11513 Describe this to the unwinder by pretending that both
11514 push and mov insns happen right here.
11516 Putting the unwind info here at the end of the ms_hook
11517 is done so that we can make absolutely certain we get
11518 the required byte sequence at the start of the function,
11519 rather than relying on an assembler that can produce
11520 the exact encoding required.
11522 However it does mean (in the unpatched case) that we have
11523 a 1 insn window where the asynchronous unwind info is
11524 incorrect. However, if we placed the unwind info at
11525 its correct location we would have incorrect unwind info
11526 in the patched case. Which is probably all moot since
11527 I don't expect Wine generates dwarf2 unwind info for the
11528 system libraries that use this feature. */
11530 insn = emit_insn (gen_blockage ());
11532 push = gen_push (hard_frame_pointer_rtx);
11533 mov = gen_rtx_SET (hard_frame_pointer_rtx,
11534 stack_pointer_rtx);
11535 RTX_FRAME_RELATED_P (push) = 1;
11536 RTX_FRAME_RELATED_P (mov) = 1;
11538 RTX_FRAME_RELATED_P (insn) = 1;
11539 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11540 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
11542 /* Note that gen_push incremented m->fs.cfa_offset, even
11543 though we didn't emit the push insn here. */
11544 m->fs.cfa_reg = hard_frame_pointer_rtx;
11545 m->fs.fp_offset = m->fs.cfa_offset;
11546 m->fs.fp_valid = true;
11548 else
11550 /* The frame pointer is not needed so pop %ebp again.
11551 This leaves us with a pristine state. */
11552 emit_insn (gen_pop (hard_frame_pointer_rtx));
11556 /* The first insn of a function that accepts its static chain on the
11557 stack is to push the register that would be filled in by a direct
11558 call. This insn will be skipped by the trampoline. */
11559 else if (ix86_static_chain_on_stack)
11561 static_chain = ix86_static_chain (cfun->decl, false);
11562 insn = emit_insn (gen_push (static_chain));
11563 emit_insn (gen_blockage ());
11565 /* We don't want to interpret this push insn as a register save,
11566 only as a stack adjustment. The real copy of the register as
11567 a save will be done later, if needed. */
11568 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
11569 t = gen_rtx_SET (stack_pointer_rtx, t);
11570 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
11571 RTX_FRAME_RELATED_P (insn) = 1;
11574 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
11575 of DRAP is needed and stack realignment is really needed after reload */
11576 if (stack_realign_drap)
11578 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11580 /* Only need to push parameter pointer reg if it is caller saved. */
11581 if (!call_used_regs[REGNO (crtl->drap_reg)])
11583 /* Push arg pointer reg */
11584 insn = emit_insn (gen_push (crtl->drap_reg));
11585 RTX_FRAME_RELATED_P (insn) = 1;
11588 /* Grab the argument pointer. */
11589 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
11590 insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
11591 RTX_FRAME_RELATED_P (insn) = 1;
11592 m->fs.cfa_reg = crtl->drap_reg;
11593 m->fs.cfa_offset = 0;
11595 /* Align the stack. */
11596 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11597 stack_pointer_rtx,
11598 GEN_INT (-align_bytes)));
11599 RTX_FRAME_RELATED_P (insn) = 1;
11601 /* Replicate the return address on the stack so that return
11602 address can be reached via (argp - 1) slot. This is needed
11603 to implement macro RETURN_ADDR_RTX and intrinsic function
11604 expand_builtin_return_addr etc. */
11605 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
11606 t = gen_frame_mem (word_mode, t);
11607 insn = emit_insn (gen_push (t));
11608 RTX_FRAME_RELATED_P (insn) = 1;
11610 /* For the purposes of frame and register save area addressing,
11611 we've started over with a new frame. */
11612 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11613 m->fs.realigned = true;
11615 if (static_chain)
11617 /* Replicate static chain on the stack so that static chain
11618 can be reached via (argp - 2) slot. This is needed for
11619 nested function with stack realignment. */
11620 insn = emit_insn (gen_push (static_chain));
11621 RTX_FRAME_RELATED_P (insn) = 1;
11625 int_registers_saved = (frame.nregs == 0);
11626 sse_registers_saved = (frame.nsseregs == 0);
11628 if (frame_pointer_needed && !m->fs.fp_valid)
11630 /* Note: AT&T enter does NOT have reversed args. Enter is probably
11631 slower on all targets. Also sdb doesn't like it. */
11632 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
11633 RTX_FRAME_RELATED_P (insn) = 1;
11635 /* Push registers now, before setting the frame pointer
11636 on SEH target. */
11637 if (!int_registers_saved
11638 && TARGET_SEH
11639 && !frame.save_regs_using_mov)
11641 ix86_emit_save_regs ();
11642 int_registers_saved = true;
11643 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11646 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
11648 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11649 RTX_FRAME_RELATED_P (insn) = 1;
11651 if (m->fs.cfa_reg == stack_pointer_rtx)
11652 m->fs.cfa_reg = hard_frame_pointer_rtx;
11653 m->fs.fp_offset = m->fs.sp_offset;
11654 m->fs.fp_valid = true;
11658 if (!int_registers_saved)
11660 /* If saving registers via PUSH, do so now. */
11661 if (!frame.save_regs_using_mov)
11663 ix86_emit_save_regs ();
11664 int_registers_saved = true;
11665 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11668 /* When using red zone we may start register saving before allocating
11669 the stack frame saving one cycle of the prologue. However, avoid
11670 doing this if we have to probe the stack; at least on x86_64 the
11671 stack probe can turn into a call that clobbers a red zone location. */
11672 else if (ix86_using_red_zone ()
11673 && (! TARGET_STACK_PROBE
11674 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
11676 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11677 int_registers_saved = true;
11681 if (stack_realign_fp)
11683 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11684 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
11686 /* The computation of the size of the re-aligned stack frame means
11687 that we must allocate the size of the register save area before
11688 performing the actual alignment. Otherwise we cannot guarantee
11689 that there's enough storage above the realignment point. */
11690 if (m->fs.sp_offset != frame.sse_reg_save_offset)
11691 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11692 GEN_INT (m->fs.sp_offset
11693 - frame.sse_reg_save_offset),
11694 -1, false);
11696 /* Align the stack. */
11697 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11698 stack_pointer_rtx,
11699 GEN_INT (-align_bytes)));
11701 /* For the purposes of register save area addressing, the stack
11702 pointer is no longer valid. As for the value of sp_offset,
11703 see ix86_compute_frame_layout, which we need to match in order
11704 to pass verification of stack_pointer_offset at the end. */
11705 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
11706 m->fs.sp_valid = false;
11709 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
11711 if (flag_stack_usage_info)
11713 /* We start to count from ARG_POINTER. */
11714 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
11716 /* If it was realigned, take into account the fake frame. */
11717 if (stack_realign_drap)
11719 if (ix86_static_chain_on_stack)
11720 stack_size += UNITS_PER_WORD;
11722 if (!call_used_regs[REGNO (crtl->drap_reg)])
11723 stack_size += UNITS_PER_WORD;
11725 /* This over-estimates by 1 minimal-stack-alignment-unit but
11726 mitigates that by counting in the new return address slot. */
11727 current_function_dynamic_stack_size
11728 += crtl->stack_alignment_needed / BITS_PER_UNIT;
11731 current_function_static_stack_size = stack_size;
11734 /* On SEH target with very large frame size, allocate an area to save
11735 SSE registers (as the very large allocation won't be described). */
11736 if (TARGET_SEH
11737 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
11738 && !sse_registers_saved)
11740 HOST_WIDE_INT sse_size =
11741 frame.sse_reg_save_offset - frame.reg_save_offset;
11743 gcc_assert (int_registers_saved);
11745 /* No need to do stack checking as the area will be immediately
11746 written. */
11747 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11748 GEN_INT (-sse_size), -1,
11749 m->fs.cfa_reg == stack_pointer_rtx);
11750 allocate -= sse_size;
11751 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11752 sse_registers_saved = true;
11755 /* The stack has already been decremented by the instruction calling us
11756 so probe if the size is non-negative to preserve the protection area. */
11757 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11759 /* We expect the registers to be saved when probes are used. */
11760 gcc_assert (int_registers_saved);
11762 if (STACK_CHECK_MOVING_SP)
11764 if (!(crtl->is_leaf && !cfun->calls_alloca
11765 && allocate <= PROBE_INTERVAL))
11767 ix86_adjust_stack_and_probe (allocate);
11768 allocate = 0;
11771 else
11773 HOST_WIDE_INT size = allocate;
11775 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11776 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11778 if (TARGET_STACK_PROBE)
11780 if (crtl->is_leaf && !cfun->calls_alloca)
11782 if (size > PROBE_INTERVAL)
11783 ix86_emit_probe_stack_range (0, size);
11785 else
11786 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11788 else
11790 if (crtl->is_leaf && !cfun->calls_alloca)
11792 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11793 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11794 size - STACK_CHECK_PROTECT);
11796 else
11797 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11802 if (allocate == 0)
11804 else if (!ix86_target_stack_probe ()
11805 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11807 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11808 GEN_INT (-allocate), -1,
11809 m->fs.cfa_reg == stack_pointer_rtx);
11811 else
11813 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11814 rtx r10 = NULL;
11815 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11816 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11817 bool eax_live = ix86_eax_live_at_start_p ();
11818 bool r10_live = false;
11820 if (TARGET_64BIT)
11821 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11823 if (eax_live)
11825 insn = emit_insn (gen_push (eax));
11826 allocate -= UNITS_PER_WORD;
11827 /* Note that SEH directives need to continue tracking the stack
11828 pointer even after the frame pointer has been set up. */
11829 if (sp_is_cfa_reg || TARGET_SEH)
11831 if (sp_is_cfa_reg)
11832 m->fs.cfa_offset += UNITS_PER_WORD;
11833 RTX_FRAME_RELATED_P (insn) = 1;
11834 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11835 gen_rtx_SET (stack_pointer_rtx,
11836 plus_constant (Pmode, stack_pointer_rtx,
11837 -UNITS_PER_WORD)));
11841 if (r10_live)
11843 r10 = gen_rtx_REG (Pmode, R10_REG);
11844 insn = emit_insn (gen_push (r10));
11845 allocate -= UNITS_PER_WORD;
11846 if (sp_is_cfa_reg || TARGET_SEH)
11848 if (sp_is_cfa_reg)
11849 m->fs.cfa_offset += UNITS_PER_WORD;
11850 RTX_FRAME_RELATED_P (insn) = 1;
11851 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11852 gen_rtx_SET (stack_pointer_rtx,
11853 plus_constant (Pmode, stack_pointer_rtx,
11854 -UNITS_PER_WORD)));
11858 emit_move_insn (eax, GEN_INT (allocate));
11859 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11861 /* Use the fact that AX still contains ALLOCATE. */
11862 adjust_stack_insn = (Pmode == DImode
11863 ? gen_pro_epilogue_adjust_stack_di_sub
11864 : gen_pro_epilogue_adjust_stack_si_sub);
11866 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11867 stack_pointer_rtx, eax));
11869 if (sp_is_cfa_reg || TARGET_SEH)
11871 if (sp_is_cfa_reg)
11872 m->fs.cfa_offset += allocate;
11873 RTX_FRAME_RELATED_P (insn) = 1;
11874 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11875 gen_rtx_SET (stack_pointer_rtx,
11876 plus_constant (Pmode, stack_pointer_rtx,
11877 -allocate)));
11879 m->fs.sp_offset += allocate;
11881 /* Use stack_pointer_rtx for relative addressing so that code
11882 works for realigned stack, too. */
11883 if (r10_live && eax_live)
11885 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11886 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11887 gen_frame_mem (word_mode, t));
11888 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11889 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11890 gen_frame_mem (word_mode, t));
11892 else if (eax_live || r10_live)
11894 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11895 emit_move_insn (gen_rtx_REG (word_mode,
11896 (eax_live ? AX_REG : R10_REG)),
11897 gen_frame_mem (word_mode, t));
11900 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11902 /* If we havn't already set up the frame pointer, do so now. */
11903 if (frame_pointer_needed && !m->fs.fp_valid)
11905 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11906 GEN_INT (frame.stack_pointer_offset
11907 - frame.hard_frame_pointer_offset));
11908 insn = emit_insn (insn);
11909 RTX_FRAME_RELATED_P (insn) = 1;
11910 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11912 if (m->fs.cfa_reg == stack_pointer_rtx)
11913 m->fs.cfa_reg = hard_frame_pointer_rtx;
11914 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11915 m->fs.fp_valid = true;
11918 if (!int_registers_saved)
11919 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11920 if (!sse_registers_saved)
11921 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11923 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
11924 in PROLOGUE. */
11925 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
11927 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
11928 insn = emit_insn (gen_set_got (pic));
11929 RTX_FRAME_RELATED_P (insn) = 1;
11930 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11931 emit_insn (gen_prologue_use (pic));
11932 /* Deleting already emmitted SET_GOT if exist and allocated to
11933 REAL_PIC_OFFSET_TABLE_REGNUM. */
11934 ix86_elim_entry_set_got (pic);
11937 if (crtl->drap_reg && !crtl->stack_realign_needed)
11939 /* vDRAP is setup but after reload it turns out stack realign
11940 isn't necessary, here we will emit prologue to setup DRAP
11941 without stack realign adjustment */
11942 t = choose_baseaddr (0);
11943 emit_insn (gen_rtx_SET (crtl->drap_reg, t));
11946 /* Prevent instructions from being scheduled into register save push
11947 sequence when access to the redzone area is done through frame pointer.
11948 The offset between the frame pointer and the stack pointer is calculated
11949 relative to the value of the stack pointer at the end of the function
11950 prologue, and moving instructions that access redzone area via frame
11951 pointer inside push sequence violates this assumption. */
11952 if (frame_pointer_needed && frame.red_zone_size)
11953 emit_insn (gen_memory_blockage ());
11955 /* Emit cld instruction if stringops are used in the function. */
11956 if (TARGET_CLD && ix86_current_function_needs_cld)
11957 emit_insn (gen_cld ());
11959 /* SEH requires that the prologue end within 256 bytes of the start of
11960 the function. Prevent instruction schedules that would extend that.
11961 Further, prevent alloca modifications to the stack pointer from being
11962 combined with prologue modifications. */
11963 if (TARGET_SEH)
11964 emit_insn (gen_prologue_use (stack_pointer_rtx));
11967 /* Emit code to restore REG using a POP insn. */
11969 static void
11970 ix86_emit_restore_reg_using_pop (rtx reg)
11972 struct machine_function *m = cfun->machine;
11973 rtx_insn *insn = emit_insn (gen_pop (reg));
11975 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11976 m->fs.sp_offset -= UNITS_PER_WORD;
11978 if (m->fs.cfa_reg == crtl->drap_reg
11979 && REGNO (reg) == REGNO (crtl->drap_reg))
11981 /* Previously we'd represented the CFA as an expression
11982 like *(%ebp - 8). We've just popped that value from
11983 the stack, which means we need to reset the CFA to
11984 the drap register. This will remain until we restore
11985 the stack pointer. */
11986 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11987 RTX_FRAME_RELATED_P (insn) = 1;
11989 /* This means that the DRAP register is valid for addressing too. */
11990 m->fs.drap_valid = true;
11991 return;
11994 if (m->fs.cfa_reg == stack_pointer_rtx)
11996 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11997 x = gen_rtx_SET (stack_pointer_rtx, x);
11998 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11999 RTX_FRAME_RELATED_P (insn) = 1;
12001 m->fs.cfa_offset -= UNITS_PER_WORD;
12004 /* When the frame pointer is the CFA, and we pop it, we are
12005 swapping back to the stack pointer as the CFA. This happens
12006 for stack frames that don't allocate other data, so we assume
12007 the stack pointer is now pointing at the return address, i.e.
12008 the function entry state, which makes the offset be 1 word. */
12009 if (reg == hard_frame_pointer_rtx)
12011 m->fs.fp_valid = false;
12012 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
12014 m->fs.cfa_reg = stack_pointer_rtx;
12015 m->fs.cfa_offset -= UNITS_PER_WORD;
12017 add_reg_note (insn, REG_CFA_DEF_CFA,
12018 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12019 GEN_INT (m->fs.cfa_offset)));
12020 RTX_FRAME_RELATED_P (insn) = 1;
12025 /* Emit code to restore saved registers using POP insns. */
12027 static void
12028 ix86_emit_restore_regs_using_pop (void)
12030 unsigned int regno;
12032 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12033 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
12034 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
12037 /* Emit code and notes for the LEAVE instruction. */
12039 static void
12040 ix86_emit_leave (void)
12042 struct machine_function *m = cfun->machine;
12043 rtx_insn *insn = emit_insn (ix86_gen_leave ());
12045 ix86_add_queued_cfa_restore_notes (insn);
12047 gcc_assert (m->fs.fp_valid);
12048 m->fs.sp_valid = true;
12049 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
12050 m->fs.fp_valid = false;
12052 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
12054 m->fs.cfa_reg = stack_pointer_rtx;
12055 m->fs.cfa_offset = m->fs.sp_offset;
12057 add_reg_note (insn, REG_CFA_DEF_CFA,
12058 plus_constant (Pmode, stack_pointer_rtx,
12059 m->fs.sp_offset));
12060 RTX_FRAME_RELATED_P (insn) = 1;
12062 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
12063 m->fs.fp_offset);
12066 /* Emit code to restore saved registers using MOV insns.
12067 First register is restored from CFA - CFA_OFFSET. */
12068 static void
12069 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
12070 bool maybe_eh_return)
12072 struct machine_function *m = cfun->machine;
12073 unsigned int regno;
12075 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12076 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
12078 rtx reg = gen_rtx_REG (word_mode, regno);
12079 rtx mem;
12080 rtx_insn *insn;
12082 mem = choose_baseaddr (cfa_offset);
12083 mem = gen_frame_mem (word_mode, mem);
12084 insn = emit_move_insn (reg, mem);
12086 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
12088 /* Previously we'd represented the CFA as an expression
12089 like *(%ebp - 8). We've just popped that value from
12090 the stack, which means we need to reset the CFA to
12091 the drap register. This will remain until we restore
12092 the stack pointer. */
12093 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
12094 RTX_FRAME_RELATED_P (insn) = 1;
12096 /* This means that the DRAP register is valid for addressing. */
12097 m->fs.drap_valid = true;
12099 else
12100 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
12102 cfa_offset -= UNITS_PER_WORD;
12106 /* Emit code to restore saved registers using MOV insns.
12107 First register is restored from CFA - CFA_OFFSET. */
12108 static void
12109 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
12110 bool maybe_eh_return)
12112 unsigned int regno;
12114 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12115 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
12117 rtx reg = gen_rtx_REG (V4SFmode, regno);
12118 rtx mem;
12120 mem = choose_baseaddr (cfa_offset);
12121 mem = gen_rtx_MEM (V4SFmode, mem);
12122 set_mem_align (mem, 128);
12123 emit_move_insn (reg, mem);
12125 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
12127 cfa_offset -= 16;
12131 /* Restore function stack, frame, and registers. */
12133 void
12134 ix86_expand_epilogue (int style)
12136 struct machine_function *m = cfun->machine;
12137 struct machine_frame_state frame_state_save = m->fs;
12138 struct ix86_frame frame;
12139 bool restore_regs_via_mov;
12140 bool using_drap;
12142 ix86_finalize_stack_realign_flags ();
12143 ix86_compute_frame_layout (&frame);
12145 m->fs.sp_valid = (!frame_pointer_needed
12146 || (crtl->sp_is_unchanging
12147 && !stack_realign_fp));
12148 gcc_assert (!m->fs.sp_valid
12149 || m->fs.sp_offset == frame.stack_pointer_offset);
12151 /* The FP must be valid if the frame pointer is present. */
12152 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
12153 gcc_assert (!m->fs.fp_valid
12154 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
12156 /* We must have *some* valid pointer to the stack frame. */
12157 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
12159 /* The DRAP is never valid at this point. */
12160 gcc_assert (!m->fs.drap_valid);
12162 /* See the comment about red zone and frame
12163 pointer usage in ix86_expand_prologue. */
12164 if (frame_pointer_needed && frame.red_zone_size)
12165 emit_insn (gen_memory_blockage ());
12167 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
12168 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
12170 /* Determine the CFA offset of the end of the red-zone. */
12171 m->fs.red_zone_offset = 0;
12172 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
12174 /* The red-zone begins below the return address. */
12175 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
12177 /* When the register save area is in the aligned portion of
12178 the stack, determine the maximum runtime displacement that
12179 matches up with the aligned frame. */
12180 if (stack_realign_drap)
12181 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
12182 + UNITS_PER_WORD);
12185 /* Special care must be taken for the normal return case of a function
12186 using eh_return: the eax and edx registers are marked as saved, but
12187 not restored along this path. Adjust the save location to match. */
12188 if (crtl->calls_eh_return && style != 2)
12189 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
12191 /* EH_RETURN requires the use of moves to function properly. */
12192 if (crtl->calls_eh_return)
12193 restore_regs_via_mov = true;
12194 /* SEH requires the use of pops to identify the epilogue. */
12195 else if (TARGET_SEH)
12196 restore_regs_via_mov = false;
12197 /* If we're only restoring one register and sp is not valid then
12198 using a move instruction to restore the register since it's
12199 less work than reloading sp and popping the register. */
12200 else if (!m->fs.sp_valid && frame.nregs <= 1)
12201 restore_regs_via_mov = true;
12202 else if (TARGET_EPILOGUE_USING_MOVE
12203 && cfun->machine->use_fast_prologue_epilogue
12204 && (frame.nregs > 1
12205 || m->fs.sp_offset != frame.reg_save_offset))
12206 restore_regs_via_mov = true;
12207 else if (frame_pointer_needed
12208 && !frame.nregs
12209 && m->fs.sp_offset != frame.reg_save_offset)
12210 restore_regs_via_mov = true;
12211 else if (frame_pointer_needed
12212 && TARGET_USE_LEAVE
12213 && cfun->machine->use_fast_prologue_epilogue
12214 && frame.nregs == 1)
12215 restore_regs_via_mov = true;
12216 else
12217 restore_regs_via_mov = false;
12219 if (restore_regs_via_mov || frame.nsseregs)
12221 /* Ensure that the entire register save area is addressable via
12222 the stack pointer, if we will restore via sp. */
12223 if (TARGET_64BIT
12224 && m->fs.sp_offset > 0x7fffffff
12225 && !(m->fs.fp_valid || m->fs.drap_valid)
12226 && (frame.nsseregs + frame.nregs) != 0)
12228 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12229 GEN_INT (m->fs.sp_offset
12230 - frame.sse_reg_save_offset),
12231 style,
12232 m->fs.cfa_reg == stack_pointer_rtx);
12236 /* If there are any SSE registers to restore, then we have to do it
12237 via moves, since there's obviously no pop for SSE regs. */
12238 if (frame.nsseregs)
12239 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
12240 style == 2);
12242 if (restore_regs_via_mov)
12244 rtx t;
12246 if (frame.nregs)
12247 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
12249 /* eh_return epilogues need %ecx added to the stack pointer. */
12250 if (style == 2)
12252 rtx sa = EH_RETURN_STACKADJ_RTX;
12253 rtx_insn *insn;
12255 /* Stack align doesn't work with eh_return. */
12256 gcc_assert (!stack_realign_drap);
12257 /* Neither does regparm nested functions. */
12258 gcc_assert (!ix86_static_chain_on_stack);
12260 if (frame_pointer_needed)
12262 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
12263 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
12264 emit_insn (gen_rtx_SET (sa, t));
12266 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
12267 insn = emit_move_insn (hard_frame_pointer_rtx, t);
12269 /* Note that we use SA as a temporary CFA, as the return
12270 address is at the proper place relative to it. We
12271 pretend this happens at the FP restore insn because
12272 prior to this insn the FP would be stored at the wrong
12273 offset relative to SA, and after this insn we have no
12274 other reasonable register to use for the CFA. We don't
12275 bother resetting the CFA to the SP for the duration of
12276 the return insn. */
12277 add_reg_note (insn, REG_CFA_DEF_CFA,
12278 plus_constant (Pmode, sa, UNITS_PER_WORD));
12279 ix86_add_queued_cfa_restore_notes (insn);
12280 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
12281 RTX_FRAME_RELATED_P (insn) = 1;
12283 m->fs.cfa_reg = sa;
12284 m->fs.cfa_offset = UNITS_PER_WORD;
12285 m->fs.fp_valid = false;
12287 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
12288 const0_rtx, style, false);
12290 else
12292 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
12293 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
12294 insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
12295 ix86_add_queued_cfa_restore_notes (insn);
12297 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
12298 if (m->fs.cfa_offset != UNITS_PER_WORD)
12300 m->fs.cfa_offset = UNITS_PER_WORD;
12301 add_reg_note (insn, REG_CFA_DEF_CFA,
12302 plus_constant (Pmode, stack_pointer_rtx,
12303 UNITS_PER_WORD));
12304 RTX_FRAME_RELATED_P (insn) = 1;
12307 m->fs.sp_offset = UNITS_PER_WORD;
12308 m->fs.sp_valid = true;
12311 else
12313 /* SEH requires that the function end with (1) a stack adjustment
12314 if necessary, (2) a sequence of pops, and (3) a return or
12315 jump instruction. Prevent insns from the function body from
12316 being scheduled into this sequence. */
12317 if (TARGET_SEH)
12319 /* Prevent a catch region from being adjacent to the standard
12320 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
12321 several other flags that would be interesting to test are
12322 not yet set up. */
12323 if (flag_non_call_exceptions)
12324 emit_insn (gen_nops (const1_rtx));
12325 else
12326 emit_insn (gen_blockage ());
12329 /* First step is to deallocate the stack frame so that we can
12330 pop the registers. Also do it on SEH target for very large
12331 frame as the emitted instructions aren't allowed by the ABI in
12332 epilogues. */
12333 if (!m->fs.sp_valid
12334 || (TARGET_SEH
12335 && (m->fs.sp_offset - frame.reg_save_offset
12336 >= SEH_MAX_FRAME_SIZE)))
12338 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
12339 GEN_INT (m->fs.fp_offset
12340 - frame.reg_save_offset),
12341 style, false);
12343 else if (m->fs.sp_offset != frame.reg_save_offset)
12345 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12346 GEN_INT (m->fs.sp_offset
12347 - frame.reg_save_offset),
12348 style,
12349 m->fs.cfa_reg == stack_pointer_rtx);
12352 ix86_emit_restore_regs_using_pop ();
12355 /* If we used a stack pointer and haven't already got rid of it,
12356 then do so now. */
12357 if (m->fs.fp_valid)
12359 /* If the stack pointer is valid and pointing at the frame
12360 pointer store address, then we only need a pop. */
12361 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
12362 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12363 /* Leave results in shorter dependency chains on CPUs that are
12364 able to grok it fast. */
12365 else if (TARGET_USE_LEAVE
12366 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
12367 || !cfun->machine->use_fast_prologue_epilogue)
12368 ix86_emit_leave ();
12369 else
12371 pro_epilogue_adjust_stack (stack_pointer_rtx,
12372 hard_frame_pointer_rtx,
12373 const0_rtx, style, !using_drap);
12374 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12378 if (using_drap)
12380 int param_ptr_offset = UNITS_PER_WORD;
12381 rtx_insn *insn;
12383 gcc_assert (stack_realign_drap);
12385 if (ix86_static_chain_on_stack)
12386 param_ptr_offset += UNITS_PER_WORD;
12387 if (!call_used_regs[REGNO (crtl->drap_reg)])
12388 param_ptr_offset += UNITS_PER_WORD;
12390 insn = emit_insn (gen_rtx_SET
12391 (stack_pointer_rtx,
12392 gen_rtx_PLUS (Pmode,
12393 crtl->drap_reg,
12394 GEN_INT (-param_ptr_offset))));
12395 m->fs.cfa_reg = stack_pointer_rtx;
12396 m->fs.cfa_offset = param_ptr_offset;
12397 m->fs.sp_offset = param_ptr_offset;
12398 m->fs.realigned = false;
12400 add_reg_note (insn, REG_CFA_DEF_CFA,
12401 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12402 GEN_INT (param_ptr_offset)));
12403 RTX_FRAME_RELATED_P (insn) = 1;
12405 if (!call_used_regs[REGNO (crtl->drap_reg)])
12406 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
12409 /* At this point the stack pointer must be valid, and we must have
12410 restored all of the registers. We may not have deallocated the
12411 entire stack frame. We've delayed this until now because it may
12412 be possible to merge the local stack deallocation with the
12413 deallocation forced by ix86_static_chain_on_stack. */
12414 gcc_assert (m->fs.sp_valid);
12415 gcc_assert (!m->fs.fp_valid);
12416 gcc_assert (!m->fs.realigned);
12417 if (m->fs.sp_offset != UNITS_PER_WORD)
12419 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12420 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
12421 style, true);
12423 else
12424 ix86_add_queued_cfa_restore_notes (get_last_insn ());
12426 /* Sibcall epilogues don't want a return instruction. */
12427 if (style == 0)
12429 m->fs = frame_state_save;
12430 return;
12433 if (crtl->args.pops_args && crtl->args.size)
12435 rtx popc = GEN_INT (crtl->args.pops_args);
12437 /* i386 can only pop 64K bytes. If asked to pop more, pop return
12438 address, do explicit add, and jump indirectly to the caller. */
12440 if (crtl->args.pops_args >= 65536)
12442 rtx ecx = gen_rtx_REG (SImode, CX_REG);
12443 rtx_insn *insn;
12445 /* There is no "pascal" calling convention in any 64bit ABI. */
12446 gcc_assert (!TARGET_64BIT);
12448 insn = emit_insn (gen_pop (ecx));
12449 m->fs.cfa_offset -= UNITS_PER_WORD;
12450 m->fs.sp_offset -= UNITS_PER_WORD;
12452 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
12453 x = gen_rtx_SET (stack_pointer_rtx, x);
12454 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
12455 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
12456 RTX_FRAME_RELATED_P (insn) = 1;
12458 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12459 popc, -1, true);
12460 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
12462 else
12463 emit_jump_insn (gen_simple_return_pop_internal (popc));
12465 else
12466 emit_jump_insn (gen_simple_return_internal ());
12468 /* Restore the state back to the state from the prologue,
12469 so that it's correct for the next epilogue. */
12470 m->fs = frame_state_save;
12473 /* Reset from the function's potential modifications. */
12475 static void
12476 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
12478 if (pic_offset_table_rtx
12479 && !ix86_use_pseudo_pic_reg ())
12480 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
12481 #if TARGET_MACHO
12482 /* Mach-O doesn't support labels at the end of objects, so if
12483 it looks like we might want one, insert a NOP. */
12485 rtx_insn *insn = get_last_insn ();
12486 rtx_insn *deleted_debug_label = NULL;
12487 while (insn
12488 && NOTE_P (insn)
12489 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
12491 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
12492 notes only, instead set their CODE_LABEL_NUMBER to -1,
12493 otherwise there would be code generation differences
12494 in between -g and -g0. */
12495 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12496 deleted_debug_label = insn;
12497 insn = PREV_INSN (insn);
12499 if (insn
12500 && (LABEL_P (insn)
12501 || (NOTE_P (insn)
12502 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
12503 fputs ("\tnop\n", file);
12504 else if (deleted_debug_label)
12505 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
12506 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12507 CODE_LABEL_NUMBER (insn) = -1;
12509 #endif
12513 /* Return a scratch register to use in the split stack prologue. The
12514 split stack prologue is used for -fsplit-stack. It is the first
12515 instructions in the function, even before the regular prologue.
12516 The scratch register can be any caller-saved register which is not
12517 used for parameters or for the static chain. */
12519 static unsigned int
12520 split_stack_prologue_scratch_regno (void)
12522 if (TARGET_64BIT)
12523 return R11_REG;
12524 else
12526 bool is_fastcall, is_thiscall;
12527 int regparm;
12529 is_fastcall = (lookup_attribute ("fastcall",
12530 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12531 != NULL);
12532 is_thiscall = (lookup_attribute ("thiscall",
12533 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12534 != NULL);
12535 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
12537 if (is_fastcall)
12539 if (DECL_STATIC_CHAIN (cfun->decl))
12541 sorry ("-fsplit-stack does not support fastcall with "
12542 "nested function");
12543 return INVALID_REGNUM;
12545 return AX_REG;
12547 else if (is_thiscall)
12549 if (!DECL_STATIC_CHAIN (cfun->decl))
12550 return DX_REG;
12551 return AX_REG;
12553 else if (regparm < 3)
12555 if (!DECL_STATIC_CHAIN (cfun->decl))
12556 return CX_REG;
12557 else
12559 if (regparm >= 2)
12561 sorry ("-fsplit-stack does not support 2 register "
12562 "parameters for a nested function");
12563 return INVALID_REGNUM;
12565 return DX_REG;
12568 else
12570 /* FIXME: We could make this work by pushing a register
12571 around the addition and comparison. */
12572 sorry ("-fsplit-stack does not support 3 register parameters");
12573 return INVALID_REGNUM;
12578 /* A SYMBOL_REF for the function which allocates new stackspace for
12579 -fsplit-stack. */
12581 static GTY(()) rtx split_stack_fn;
12583 /* A SYMBOL_REF for the more stack function when using the large
12584 model. */
12586 static GTY(()) rtx split_stack_fn_large;
12588 /* Handle -fsplit-stack. These are the first instructions in the
12589 function, even before the regular prologue. */
12591 void
12592 ix86_expand_split_stack_prologue (void)
12594 struct ix86_frame frame;
12595 HOST_WIDE_INT allocate;
12596 unsigned HOST_WIDE_INT args_size;
12597 rtx_code_label *label;
12598 rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
12599 rtx scratch_reg = NULL_RTX;
12600 rtx_code_label *varargs_label = NULL;
12601 rtx fn;
12603 gcc_assert (flag_split_stack && reload_completed);
12605 ix86_finalize_stack_realign_flags ();
12606 ix86_compute_frame_layout (&frame);
12607 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
12609 /* This is the label we will branch to if we have enough stack
12610 space. We expect the basic block reordering pass to reverse this
12611 branch if optimizing, so that we branch in the unlikely case. */
12612 label = gen_label_rtx ();
12614 /* We need to compare the stack pointer minus the frame size with
12615 the stack boundary in the TCB. The stack boundary always gives
12616 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
12617 can compare directly. Otherwise we need to do an addition. */
12619 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12620 UNSPEC_STACK_CHECK);
12621 limit = gen_rtx_CONST (Pmode, limit);
12622 limit = gen_rtx_MEM (Pmode, limit);
12623 if (allocate < SPLIT_STACK_AVAILABLE)
12624 current = stack_pointer_rtx;
12625 else
12627 unsigned int scratch_regno;
12628 rtx offset;
12630 /* We need a scratch register to hold the stack pointer minus
12631 the required frame size. Since this is the very start of the
12632 function, the scratch register can be any caller-saved
12633 register which is not used for parameters. */
12634 offset = GEN_INT (- allocate);
12635 scratch_regno = split_stack_prologue_scratch_regno ();
12636 if (scratch_regno == INVALID_REGNUM)
12637 return;
12638 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12639 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
12641 /* We don't use ix86_gen_add3 in this case because it will
12642 want to split to lea, but when not optimizing the insn
12643 will not be split after this point. */
12644 emit_insn (gen_rtx_SET (scratch_reg,
12645 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12646 offset)));
12648 else
12650 emit_move_insn (scratch_reg, offset);
12651 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
12652 stack_pointer_rtx));
12654 current = scratch_reg;
12657 ix86_expand_branch (GEU, current, limit, label);
12658 jump_insn = get_last_insn ();
12659 JUMP_LABEL (jump_insn) = label;
12661 /* Mark the jump as very likely to be taken. */
12662 add_int_reg_note (jump_insn, REG_BR_PROB,
12663 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
12665 if (split_stack_fn == NULL_RTX)
12667 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
12668 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
12670 fn = split_stack_fn;
12672 /* Get more stack space. We pass in the desired stack space and the
12673 size of the arguments to copy to the new stack. In 32-bit mode
12674 we push the parameters; __morestack will return on a new stack
12675 anyhow. In 64-bit mode we pass the parameters in r10 and
12676 r11. */
12677 allocate_rtx = GEN_INT (allocate);
12678 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
12679 call_fusage = NULL_RTX;
12680 if (TARGET_64BIT)
12682 rtx reg10, reg11;
12684 reg10 = gen_rtx_REG (Pmode, R10_REG);
12685 reg11 = gen_rtx_REG (Pmode, R11_REG);
12687 /* If this function uses a static chain, it will be in %r10.
12688 Preserve it across the call to __morestack. */
12689 if (DECL_STATIC_CHAIN (cfun->decl))
12691 rtx rax;
12693 rax = gen_rtx_REG (word_mode, AX_REG);
12694 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
12695 use_reg (&call_fusage, rax);
12698 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
12699 && !TARGET_PECOFF)
12701 HOST_WIDE_INT argval;
12703 gcc_assert (Pmode == DImode);
12704 /* When using the large model we need to load the address
12705 into a register, and we've run out of registers. So we
12706 switch to a different calling convention, and we call a
12707 different function: __morestack_large. We pass the
12708 argument size in the upper 32 bits of r10 and pass the
12709 frame size in the lower 32 bits. */
12710 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
12711 gcc_assert ((args_size & 0xffffffff) == args_size);
12713 if (split_stack_fn_large == NULL_RTX)
12715 split_stack_fn_large =
12716 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
12717 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
12719 if (ix86_cmodel == CM_LARGE_PIC)
12721 rtx_code_label *label;
12722 rtx x;
12724 label = gen_label_rtx ();
12725 emit_label (label);
12726 LABEL_PRESERVE_P (label) = 1;
12727 emit_insn (gen_set_rip_rex64 (reg10, label));
12728 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12729 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12730 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12731 UNSPEC_GOT);
12732 x = gen_rtx_CONST (Pmode, x);
12733 emit_move_insn (reg11, x);
12734 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12735 x = gen_const_mem (Pmode, x);
12736 emit_move_insn (reg11, x);
12738 else
12739 emit_move_insn (reg11, split_stack_fn_large);
12741 fn = reg11;
12743 argval = ((args_size << 16) << 16) + allocate;
12744 emit_move_insn (reg10, GEN_INT (argval));
12746 else
12748 emit_move_insn (reg10, allocate_rtx);
12749 emit_move_insn (reg11, GEN_INT (args_size));
12750 use_reg (&call_fusage, reg11);
12753 use_reg (&call_fusage, reg10);
12755 else
12757 emit_insn (gen_push (GEN_INT (args_size)));
12758 emit_insn (gen_push (allocate_rtx));
12760 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12761 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12762 NULL_RTX, false);
12763 add_function_usage_to (call_insn, call_fusage);
12765 /* In order to make call/return prediction work right, we now need
12766 to execute a return instruction. See
12767 libgcc/config/i386/morestack.S for the details on how this works.
12769 For flow purposes gcc must not see this as a return
12770 instruction--we need control flow to continue at the subsequent
12771 label. Therefore, we use an unspec. */
12772 gcc_assert (crtl->args.pops_args < 65536);
12773 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12775 /* If we are in 64-bit mode and this function uses a static chain,
12776 we saved %r10 in %rax before calling _morestack. */
12777 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12778 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12779 gen_rtx_REG (word_mode, AX_REG));
12781 /* If this function calls va_start, we need to store a pointer to
12782 the arguments on the old stack, because they may not have been
12783 all copied to the new stack. At this point the old stack can be
12784 found at the frame pointer value used by __morestack, because
12785 __morestack has set that up before calling back to us. Here we
12786 store that pointer in a scratch register, and in
12787 ix86_expand_prologue we store the scratch register in a stack
12788 slot. */
12789 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12791 unsigned int scratch_regno;
12792 rtx frame_reg;
12793 int words;
12795 scratch_regno = split_stack_prologue_scratch_regno ();
12796 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12797 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12799 /* 64-bit:
12800 fp -> old fp value
12801 return address within this function
12802 return address of caller of this function
12803 stack arguments
12804 So we add three words to get to the stack arguments.
12806 32-bit:
12807 fp -> old fp value
12808 return address within this function
12809 first argument to __morestack
12810 second argument to __morestack
12811 return address of caller of this function
12812 stack arguments
12813 So we add five words to get to the stack arguments.
12815 words = TARGET_64BIT ? 3 : 5;
12816 emit_insn (gen_rtx_SET (scratch_reg,
12817 gen_rtx_PLUS (Pmode, frame_reg,
12818 GEN_INT (words * UNITS_PER_WORD))));
12820 varargs_label = gen_label_rtx ();
12821 emit_jump_insn (gen_jump (varargs_label));
12822 JUMP_LABEL (get_last_insn ()) = varargs_label;
12824 emit_barrier ();
12827 emit_label (label);
12828 LABEL_NUSES (label) = 1;
12830 /* If this function calls va_start, we now have to set the scratch
12831 register for the case where we do not call __morestack. In this
12832 case we need to set it based on the stack pointer. */
12833 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12835 emit_insn (gen_rtx_SET (scratch_reg,
12836 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12837 GEN_INT (UNITS_PER_WORD))));
12839 emit_label (varargs_label);
12840 LABEL_NUSES (varargs_label) = 1;
12844 /* We may have to tell the dataflow pass that the split stack prologue
12845 is initializing a scratch register. */
12847 static void
12848 ix86_live_on_entry (bitmap regs)
12850 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12852 gcc_assert (flag_split_stack);
12853 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12857 /* Extract the parts of an RTL expression that is a valid memory address
12858 for an instruction. Return 0 if the structure of the address is
12859 grossly off. Return -1 if the address contains ASHIFT, so it is not
12860 strictly valid, but still used for computing length of lea instruction. */
12863 ix86_decompose_address (rtx addr, struct ix86_address *out)
12865 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12866 rtx base_reg, index_reg;
12867 HOST_WIDE_INT scale = 1;
12868 rtx scale_rtx = NULL_RTX;
12869 rtx tmp;
12870 int retval = 1;
12871 enum ix86_address_seg seg = SEG_DEFAULT;
12873 /* Allow zero-extended SImode addresses,
12874 they will be emitted with addr32 prefix. */
12875 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12877 if (GET_CODE (addr) == ZERO_EXTEND
12878 && GET_MODE (XEXP (addr, 0)) == SImode)
12880 addr = XEXP (addr, 0);
12881 if (CONST_INT_P (addr))
12882 return 0;
12884 else if (GET_CODE (addr) == AND
12885 && const_32bit_mask (XEXP (addr, 1), DImode))
12887 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12888 if (addr == NULL_RTX)
12889 return 0;
12891 if (CONST_INT_P (addr))
12892 return 0;
12896 /* Allow SImode subregs of DImode addresses,
12897 they will be emitted with addr32 prefix. */
12898 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12900 if (SUBREG_P (addr)
12901 && GET_MODE (SUBREG_REG (addr)) == DImode)
12903 addr = SUBREG_REG (addr);
12904 if (CONST_INT_P (addr))
12905 return 0;
12909 if (REG_P (addr))
12910 base = addr;
12911 else if (SUBREG_P (addr))
12913 if (REG_P (SUBREG_REG (addr)))
12914 base = addr;
12915 else
12916 return 0;
12918 else if (GET_CODE (addr) == PLUS)
12920 rtx addends[4], op;
12921 int n = 0, i;
12923 op = addr;
12926 if (n >= 4)
12927 return 0;
12928 addends[n++] = XEXP (op, 1);
12929 op = XEXP (op, 0);
12931 while (GET_CODE (op) == PLUS);
12932 if (n >= 4)
12933 return 0;
12934 addends[n] = op;
12936 for (i = n; i >= 0; --i)
12938 op = addends[i];
12939 switch (GET_CODE (op))
12941 case MULT:
12942 if (index)
12943 return 0;
12944 index = XEXP (op, 0);
12945 scale_rtx = XEXP (op, 1);
12946 break;
12948 case ASHIFT:
12949 if (index)
12950 return 0;
12951 index = XEXP (op, 0);
12952 tmp = XEXP (op, 1);
12953 if (!CONST_INT_P (tmp))
12954 return 0;
12955 scale = INTVAL (tmp);
12956 if ((unsigned HOST_WIDE_INT) scale > 3)
12957 return 0;
12958 scale = 1 << scale;
12959 break;
12961 case ZERO_EXTEND:
12962 op = XEXP (op, 0);
12963 if (GET_CODE (op) != UNSPEC)
12964 return 0;
12965 /* FALLTHRU */
12967 case UNSPEC:
12968 if (XINT (op, 1) == UNSPEC_TP
12969 && TARGET_TLS_DIRECT_SEG_REFS
12970 && seg == SEG_DEFAULT)
12971 seg = DEFAULT_TLS_SEG_REG;
12972 else
12973 return 0;
12974 break;
12976 case SUBREG:
12977 if (!REG_P (SUBREG_REG (op)))
12978 return 0;
12979 /* FALLTHRU */
12981 case REG:
12982 if (!base)
12983 base = op;
12984 else if (!index)
12985 index = op;
12986 else
12987 return 0;
12988 break;
12990 case CONST:
12991 case CONST_INT:
12992 case SYMBOL_REF:
12993 case LABEL_REF:
12994 if (disp)
12995 return 0;
12996 disp = op;
12997 break;
12999 default:
13000 return 0;
13004 else if (GET_CODE (addr) == MULT)
13006 index = XEXP (addr, 0); /* index*scale */
13007 scale_rtx = XEXP (addr, 1);
13009 else if (GET_CODE (addr) == ASHIFT)
13011 /* We're called for lea too, which implements ashift on occasion. */
13012 index = XEXP (addr, 0);
13013 tmp = XEXP (addr, 1);
13014 if (!CONST_INT_P (tmp))
13015 return 0;
13016 scale = INTVAL (tmp);
13017 if ((unsigned HOST_WIDE_INT) scale > 3)
13018 return 0;
13019 scale = 1 << scale;
13020 retval = -1;
13022 else
13023 disp = addr; /* displacement */
13025 if (index)
13027 if (REG_P (index))
13029 else if (SUBREG_P (index)
13030 && REG_P (SUBREG_REG (index)))
13032 else
13033 return 0;
13036 /* Extract the integral value of scale. */
13037 if (scale_rtx)
13039 if (!CONST_INT_P (scale_rtx))
13040 return 0;
13041 scale = INTVAL (scale_rtx);
13044 base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base;
13045 index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index;
13047 /* Avoid useless 0 displacement. */
13048 if (disp == const0_rtx && (base || index))
13049 disp = NULL_RTX;
13051 /* Allow arg pointer and stack pointer as index if there is not scaling. */
13052 if (base_reg && index_reg && scale == 1
13053 && (index_reg == arg_pointer_rtx
13054 || index_reg == frame_pointer_rtx
13055 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
13057 std::swap (base, index);
13058 std::swap (base_reg, index_reg);
13061 /* Special case: %ebp cannot be encoded as a base without a displacement.
13062 Similarly %r13. */
13063 if (!disp
13064 && base_reg
13065 && (base_reg == hard_frame_pointer_rtx
13066 || base_reg == frame_pointer_rtx
13067 || base_reg == arg_pointer_rtx
13068 || (REG_P (base_reg)
13069 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
13070 || REGNO (base_reg) == R13_REG))))
13071 disp = const0_rtx;
13073 /* Special case: on K6, [%esi] makes the instruction vector decoded.
13074 Avoid this by transforming to [%esi+0].
13075 Reload calls address legitimization without cfun defined, so we need
13076 to test cfun for being non-NULL. */
13077 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
13078 && base_reg && !index_reg && !disp
13079 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
13080 disp = const0_rtx;
13082 /* Special case: encode reg+reg instead of reg*2. */
13083 if (!base && index && scale == 2)
13084 base = index, base_reg = index_reg, scale = 1;
13086 /* Special case: scaling cannot be encoded without base or displacement. */
13087 if (!base && !disp && index && scale != 1)
13088 disp = const0_rtx;
13090 out->base = base;
13091 out->index = index;
13092 out->disp = disp;
13093 out->scale = scale;
13094 out->seg = seg;
13096 return retval;
13099 /* Return cost of the memory address x.
13100 For i386, it is better to use a complex address than let gcc copy
13101 the address into a reg and make a new pseudo. But not if the address
13102 requires to two regs - that would mean more pseudos with longer
13103 lifetimes. */
13104 static int
13105 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
13107 struct ix86_address parts;
13108 int cost = 1;
13109 int ok = ix86_decompose_address (x, &parts);
13111 gcc_assert (ok);
13113 if (parts.base && SUBREG_P (parts.base))
13114 parts.base = SUBREG_REG (parts.base);
13115 if (parts.index && SUBREG_P (parts.index))
13116 parts.index = SUBREG_REG (parts.index);
13118 /* Attempt to minimize number of registers in the address by increasing
13119 address cost for each used register. We don't increase address cost
13120 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
13121 is not invariant itself it most likely means that base or index is not
13122 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
13123 which is not profitable for x86. */
13124 if (parts.base
13125 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
13126 && (current_pass->type == GIMPLE_PASS
13127 || !pic_offset_table_rtx
13128 || !REG_P (parts.base)
13129 || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
13130 cost++;
13132 if (parts.index
13133 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
13134 && (current_pass->type == GIMPLE_PASS
13135 || !pic_offset_table_rtx
13136 || !REG_P (parts.index)
13137 || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
13138 cost++;
13140 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
13141 since it's predecode logic can't detect the length of instructions
13142 and it degenerates to vector decoded. Increase cost of such
13143 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
13144 to split such addresses or even refuse such addresses at all.
13146 Following addressing modes are affected:
13147 [base+scale*index]
13148 [scale*index+disp]
13149 [base+index]
13151 The first and last case may be avoidable by explicitly coding the zero in
13152 memory address, but I don't have AMD-K6 machine handy to check this
13153 theory. */
13155 if (TARGET_K6
13156 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
13157 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
13158 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
13159 cost += 10;
13161 return cost;
13164 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
13165 this is used for to form addresses to local data when -fPIC is in
13166 use. */
13168 static bool
13169 darwin_local_data_pic (rtx disp)
13171 return (GET_CODE (disp) == UNSPEC
13172 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
13175 /* Determine if a given RTX is a valid constant. We already know this
13176 satisfies CONSTANT_P. */
13178 static bool
13179 ix86_legitimate_constant_p (machine_mode, rtx x)
13181 /* Pointer bounds constants are not valid. */
13182 if (POINTER_BOUNDS_MODE_P (GET_MODE (x)))
13183 return false;
13185 switch (GET_CODE (x))
13187 case CONST:
13188 x = XEXP (x, 0);
13190 if (GET_CODE (x) == PLUS)
13192 if (!CONST_INT_P (XEXP (x, 1)))
13193 return false;
13194 x = XEXP (x, 0);
13197 if (TARGET_MACHO && darwin_local_data_pic (x))
13198 return true;
13200 /* Only some unspecs are valid as "constants". */
13201 if (GET_CODE (x) == UNSPEC)
13202 switch (XINT (x, 1))
13204 case UNSPEC_GOT:
13205 case UNSPEC_GOTOFF:
13206 case UNSPEC_PLTOFF:
13207 return TARGET_64BIT;
13208 case UNSPEC_TPOFF:
13209 case UNSPEC_NTPOFF:
13210 x = XVECEXP (x, 0, 0);
13211 return (GET_CODE (x) == SYMBOL_REF
13212 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
13213 case UNSPEC_DTPOFF:
13214 x = XVECEXP (x, 0, 0);
13215 return (GET_CODE (x) == SYMBOL_REF
13216 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
13217 default:
13218 return false;
13221 /* We must have drilled down to a symbol. */
13222 if (GET_CODE (x) == LABEL_REF)
13223 return true;
13224 if (GET_CODE (x) != SYMBOL_REF)
13225 return false;
13226 /* FALLTHRU */
13228 case SYMBOL_REF:
13229 /* TLS symbols are never valid. */
13230 if (SYMBOL_REF_TLS_MODEL (x))
13231 return false;
13233 /* DLLIMPORT symbols are never valid. */
13234 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
13235 && SYMBOL_REF_DLLIMPORT_P (x))
13236 return false;
13238 #if TARGET_MACHO
13239 /* mdynamic-no-pic */
13240 if (MACHO_DYNAMIC_NO_PIC_P)
13241 return machopic_symbol_defined_p (x);
13242 #endif
13243 break;
13245 case CONST_WIDE_INT:
13246 if (!TARGET_64BIT && !standard_sse_constant_p (x))
13247 return false;
13248 break;
13250 case CONST_VECTOR:
13251 if (!standard_sse_constant_p (x))
13252 return false;
13254 default:
13255 break;
13258 /* Otherwise we handle everything else in the move patterns. */
13259 return true;
13262 /* Determine if it's legal to put X into the constant pool. This
13263 is not possible for the address of thread-local symbols, which
13264 is checked above. */
13266 static bool
13267 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
13269 /* We can always put integral constants and vectors in memory. */
13270 switch (GET_CODE (x))
13272 case CONST_INT:
13273 case CONST_WIDE_INT:
13274 case CONST_DOUBLE:
13275 case CONST_VECTOR:
13276 return false;
13278 default:
13279 break;
13281 return !ix86_legitimate_constant_p (mode, x);
13284 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
13285 otherwise zero. */
13287 static bool
13288 is_imported_p (rtx x)
13290 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
13291 || GET_CODE (x) != SYMBOL_REF)
13292 return false;
13294 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
13298 /* Nonzero if the constant value X is a legitimate general operand
13299 when generating PIC code. It is given that flag_pic is on and
13300 that X satisfies CONSTANT_P. */
13302 bool
13303 legitimate_pic_operand_p (rtx x)
13305 rtx inner;
13307 switch (GET_CODE (x))
13309 case CONST:
13310 inner = XEXP (x, 0);
13311 if (GET_CODE (inner) == PLUS
13312 && CONST_INT_P (XEXP (inner, 1)))
13313 inner = XEXP (inner, 0);
13315 /* Only some unspecs are valid as "constants". */
13316 if (GET_CODE (inner) == UNSPEC)
13317 switch (XINT (inner, 1))
13319 case UNSPEC_GOT:
13320 case UNSPEC_GOTOFF:
13321 case UNSPEC_PLTOFF:
13322 return TARGET_64BIT;
13323 case UNSPEC_TPOFF:
13324 x = XVECEXP (inner, 0, 0);
13325 return (GET_CODE (x) == SYMBOL_REF
13326 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
13327 case UNSPEC_MACHOPIC_OFFSET:
13328 return legitimate_pic_address_disp_p (x);
13329 default:
13330 return false;
13332 /* FALLTHRU */
13334 case SYMBOL_REF:
13335 case LABEL_REF:
13336 return legitimate_pic_address_disp_p (x);
13338 default:
13339 return true;
13343 /* Determine if a given CONST RTX is a valid memory displacement
13344 in PIC mode. */
13346 bool
13347 legitimate_pic_address_disp_p (rtx disp)
13349 bool saw_plus;
13351 /* In 64bit mode we can allow direct addresses of symbols and labels
13352 when they are not dynamic symbols. */
13353 if (TARGET_64BIT)
13355 rtx op0 = disp, op1;
13357 switch (GET_CODE (disp))
13359 case LABEL_REF:
13360 return true;
13362 case CONST:
13363 if (GET_CODE (XEXP (disp, 0)) != PLUS)
13364 break;
13365 op0 = XEXP (XEXP (disp, 0), 0);
13366 op1 = XEXP (XEXP (disp, 0), 1);
13367 if (!CONST_INT_P (op1)
13368 || INTVAL (op1) >= 16*1024*1024
13369 || INTVAL (op1) < -16*1024*1024)
13370 break;
13371 if (GET_CODE (op0) == LABEL_REF)
13372 return true;
13373 if (GET_CODE (op0) == CONST
13374 && GET_CODE (XEXP (op0, 0)) == UNSPEC
13375 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
13376 return true;
13377 if (GET_CODE (op0) == UNSPEC
13378 && XINT (op0, 1) == UNSPEC_PCREL)
13379 return true;
13380 if (GET_CODE (op0) != SYMBOL_REF)
13381 break;
13382 /* FALLTHRU */
13384 case SYMBOL_REF:
13385 /* TLS references should always be enclosed in UNSPEC.
13386 The dllimported symbol needs always to be resolved. */
13387 if (SYMBOL_REF_TLS_MODEL (op0)
13388 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
13389 return false;
13391 if (TARGET_PECOFF)
13393 if (is_imported_p (op0))
13394 return true;
13396 if (SYMBOL_REF_FAR_ADDR_P (op0)
13397 || !SYMBOL_REF_LOCAL_P (op0))
13398 break;
13400 /* Function-symbols need to be resolved only for
13401 large-model.
13402 For the small-model we don't need to resolve anything
13403 here. */
13404 if ((ix86_cmodel != CM_LARGE_PIC
13405 && SYMBOL_REF_FUNCTION_P (op0))
13406 || ix86_cmodel == CM_SMALL_PIC)
13407 return true;
13408 /* Non-external symbols don't need to be resolved for
13409 large, and medium-model. */
13410 if ((ix86_cmodel == CM_LARGE_PIC
13411 || ix86_cmodel == CM_MEDIUM_PIC)
13412 && !SYMBOL_REF_EXTERNAL_P (op0))
13413 return true;
13415 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
13416 && (SYMBOL_REF_LOCAL_P (op0)
13417 || (HAVE_LD_PIE_COPYRELOC
13418 && flag_pie
13419 && !SYMBOL_REF_WEAK (op0)
13420 && !SYMBOL_REF_FUNCTION_P (op0)))
13421 && ix86_cmodel != CM_LARGE_PIC)
13422 return true;
13423 break;
13425 default:
13426 break;
13429 if (GET_CODE (disp) != CONST)
13430 return false;
13431 disp = XEXP (disp, 0);
13433 if (TARGET_64BIT)
13435 /* We are unsafe to allow PLUS expressions. This limit allowed distance
13436 of GOT tables. We should not need these anyway. */
13437 if (GET_CODE (disp) != UNSPEC
13438 || (XINT (disp, 1) != UNSPEC_GOTPCREL
13439 && XINT (disp, 1) != UNSPEC_GOTOFF
13440 && XINT (disp, 1) != UNSPEC_PCREL
13441 && XINT (disp, 1) != UNSPEC_PLTOFF))
13442 return false;
13444 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
13445 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
13446 return false;
13447 return true;
13450 saw_plus = false;
13451 if (GET_CODE (disp) == PLUS)
13453 if (!CONST_INT_P (XEXP (disp, 1)))
13454 return false;
13455 disp = XEXP (disp, 0);
13456 saw_plus = true;
13459 if (TARGET_MACHO && darwin_local_data_pic (disp))
13460 return true;
13462 if (GET_CODE (disp) != UNSPEC)
13463 return false;
13465 switch (XINT (disp, 1))
13467 case UNSPEC_GOT:
13468 if (saw_plus)
13469 return false;
13470 /* We need to check for both symbols and labels because VxWorks loads
13471 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
13472 details. */
13473 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13474 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
13475 case UNSPEC_GOTOFF:
13476 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
13477 While ABI specify also 32bit relocation but we don't produce it in
13478 small PIC model at all. */
13479 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13480 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
13481 && !TARGET_64BIT)
13482 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
13483 return false;
13484 case UNSPEC_GOTTPOFF:
13485 case UNSPEC_GOTNTPOFF:
13486 case UNSPEC_INDNTPOFF:
13487 if (saw_plus)
13488 return false;
13489 disp = XVECEXP (disp, 0, 0);
13490 return (GET_CODE (disp) == SYMBOL_REF
13491 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
13492 case UNSPEC_NTPOFF:
13493 disp = XVECEXP (disp, 0, 0);
13494 return (GET_CODE (disp) == SYMBOL_REF
13495 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
13496 case UNSPEC_DTPOFF:
13497 disp = XVECEXP (disp, 0, 0);
13498 return (GET_CODE (disp) == SYMBOL_REF
13499 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
13502 return false;
13505 /* Determine if op is suitable RTX for an address register.
13506 Return naked register if a register or a register subreg is
13507 found, otherwise return NULL_RTX. */
13509 static rtx
13510 ix86_validate_address_register (rtx op)
13512 machine_mode mode = GET_MODE (op);
13514 /* Only SImode or DImode registers can form the address. */
13515 if (mode != SImode && mode != DImode)
13516 return NULL_RTX;
13518 if (REG_P (op))
13519 return op;
13520 else if (SUBREG_P (op))
13522 rtx reg = SUBREG_REG (op);
13524 if (!REG_P (reg))
13525 return NULL_RTX;
13527 mode = GET_MODE (reg);
13529 /* Don't allow SUBREGs that span more than a word. It can
13530 lead to spill failures when the register is one word out
13531 of a two word structure. */
13532 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
13533 return NULL_RTX;
13535 /* Allow only SUBREGs of non-eliminable hard registers. */
13536 if (register_no_elim_operand (reg, mode))
13537 return reg;
13540 /* Op is not a register. */
13541 return NULL_RTX;
13544 /* Recognizes RTL expressions that are valid memory addresses for an
13545 instruction. The MODE argument is the machine mode for the MEM
13546 expression that wants to use this address.
13548 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
13549 convert common non-canonical forms to canonical form so that they will
13550 be recognized. */
13552 static bool
13553 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
13555 struct ix86_address parts;
13556 rtx base, index, disp;
13557 HOST_WIDE_INT scale;
13558 enum ix86_address_seg seg;
13560 if (ix86_decompose_address (addr, &parts) <= 0)
13561 /* Decomposition failed. */
13562 return false;
13564 base = parts.base;
13565 index = parts.index;
13566 disp = parts.disp;
13567 scale = parts.scale;
13568 seg = parts.seg;
13570 /* Validate base register. */
13571 if (base)
13573 rtx reg = ix86_validate_address_register (base);
13575 if (reg == NULL_RTX)
13576 return false;
13578 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
13579 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
13580 /* Base is not valid. */
13581 return false;
13584 /* Validate index register. */
13585 if (index)
13587 rtx reg = ix86_validate_address_register (index);
13589 if (reg == NULL_RTX)
13590 return false;
13592 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
13593 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
13594 /* Index is not valid. */
13595 return false;
13598 /* Index and base should have the same mode. */
13599 if (base && index
13600 && GET_MODE (base) != GET_MODE (index))
13601 return false;
13603 /* Address override works only on the (%reg) part of %fs:(%reg). */
13604 if (seg != SEG_DEFAULT
13605 && ((base && GET_MODE (base) != word_mode)
13606 || (index && GET_MODE (index) != word_mode)))
13607 return false;
13609 /* Validate scale factor. */
13610 if (scale != 1)
13612 if (!index)
13613 /* Scale without index. */
13614 return false;
13616 if (scale != 2 && scale != 4 && scale != 8)
13617 /* Scale is not a valid multiplier. */
13618 return false;
13621 /* Validate displacement. */
13622 if (disp)
13624 if (GET_CODE (disp) == CONST
13625 && GET_CODE (XEXP (disp, 0)) == UNSPEC
13626 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
13627 switch (XINT (XEXP (disp, 0), 1))
13629 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
13630 used. While ABI specify also 32bit relocations, we don't produce
13631 them at all and use IP relative instead. */
13632 case UNSPEC_GOT:
13633 case UNSPEC_GOTOFF:
13634 gcc_assert (flag_pic);
13635 if (!TARGET_64BIT)
13636 goto is_legitimate_pic;
13638 /* 64bit address unspec. */
13639 return false;
13641 case UNSPEC_GOTPCREL:
13642 case UNSPEC_PCREL:
13643 gcc_assert (flag_pic);
13644 goto is_legitimate_pic;
13646 case UNSPEC_GOTTPOFF:
13647 case UNSPEC_GOTNTPOFF:
13648 case UNSPEC_INDNTPOFF:
13649 case UNSPEC_NTPOFF:
13650 case UNSPEC_DTPOFF:
13651 break;
13653 case UNSPEC_STACK_CHECK:
13654 gcc_assert (flag_split_stack);
13655 break;
13657 default:
13658 /* Invalid address unspec. */
13659 return false;
13662 else if (SYMBOLIC_CONST (disp)
13663 && (flag_pic
13664 || (TARGET_MACHO
13665 #if TARGET_MACHO
13666 && MACHOPIC_INDIRECT
13667 && !machopic_operand_p (disp)
13668 #endif
13672 is_legitimate_pic:
13673 if (TARGET_64BIT && (index || base))
13675 /* foo@dtpoff(%rX) is ok. */
13676 if (GET_CODE (disp) != CONST
13677 || GET_CODE (XEXP (disp, 0)) != PLUS
13678 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13679 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13680 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13681 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13682 /* Non-constant pic memory reference. */
13683 return false;
13685 else if ((!TARGET_MACHO || flag_pic)
13686 && ! legitimate_pic_address_disp_p (disp))
13687 /* Displacement is an invalid pic construct. */
13688 return false;
13689 #if TARGET_MACHO
13690 else if (MACHO_DYNAMIC_NO_PIC_P
13691 && !ix86_legitimate_constant_p (Pmode, disp))
13692 /* displacment must be referenced via non_lazy_pointer */
13693 return false;
13694 #endif
13696 /* This code used to verify that a symbolic pic displacement
13697 includes the pic_offset_table_rtx register.
13699 While this is good idea, unfortunately these constructs may
13700 be created by "adds using lea" optimization for incorrect
13701 code like:
13703 int a;
13704 int foo(int i)
13706 return *(&a+i);
13709 This code is nonsensical, but results in addressing
13710 GOT table with pic_offset_table_rtx base. We can't
13711 just refuse it easily, since it gets matched by
13712 "addsi3" pattern, that later gets split to lea in the
13713 case output register differs from input. While this
13714 can be handled by separate addsi pattern for this case
13715 that never results in lea, this seems to be easier and
13716 correct fix for crash to disable this test. */
13718 else if (GET_CODE (disp) != LABEL_REF
13719 && !CONST_INT_P (disp)
13720 && (GET_CODE (disp) != CONST
13721 || !ix86_legitimate_constant_p (Pmode, disp))
13722 && (GET_CODE (disp) != SYMBOL_REF
13723 || !ix86_legitimate_constant_p (Pmode, disp)))
13724 /* Displacement is not constant. */
13725 return false;
13726 else if (TARGET_64BIT
13727 && !x86_64_immediate_operand (disp, VOIDmode))
13728 /* Displacement is out of range. */
13729 return false;
13730 /* In x32 mode, constant addresses are sign extended to 64bit, so
13731 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13732 else if (TARGET_X32 && !(index || base)
13733 && CONST_INT_P (disp)
13734 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13735 return false;
13738 /* Everything looks valid. */
13739 return true;
13742 /* Determine if a given RTX is a valid constant address. */
13744 bool
13745 constant_address_p (rtx x)
13747 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13750 /* Return a unique alias set for the GOT. */
13752 static alias_set_type
13753 ix86_GOT_alias_set (void)
13755 static alias_set_type set = -1;
13756 if (set == -1)
13757 set = new_alias_set ();
13758 return set;
13761 /* Return a legitimate reference for ORIG (an address) using the
13762 register REG. If REG is 0, a new pseudo is generated.
13764 There are two types of references that must be handled:
13766 1. Global data references must load the address from the GOT, via
13767 the PIC reg. An insn is emitted to do this load, and the reg is
13768 returned.
13770 2. Static data references, constant pool addresses, and code labels
13771 compute the address as an offset from the GOT, whose base is in
13772 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13773 differentiate them from global data objects. The returned
13774 address is the PIC reg + an unspec constant.
13776 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13777 reg also appears in the address. */
13779 static rtx
13780 legitimize_pic_address (rtx orig, rtx reg)
13782 rtx addr = orig;
13783 rtx new_rtx = orig;
13785 #if TARGET_MACHO
13786 if (TARGET_MACHO && !TARGET_64BIT)
13788 if (reg == 0)
13789 reg = gen_reg_rtx (Pmode);
13790 /* Use the generic Mach-O PIC machinery. */
13791 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13793 #endif
13795 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13797 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13798 if (tmp)
13799 return tmp;
13802 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13803 new_rtx = addr;
13804 else if (TARGET_64BIT && !TARGET_PECOFF
13805 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13807 rtx tmpreg;
13808 /* This symbol may be referenced via a displacement from the PIC
13809 base address (@GOTOFF). */
13811 if (GET_CODE (addr) == CONST)
13812 addr = XEXP (addr, 0);
13813 if (GET_CODE (addr) == PLUS)
13815 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13816 UNSPEC_GOTOFF);
13817 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13819 else
13820 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13821 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13822 if (!reg)
13823 tmpreg = gen_reg_rtx (Pmode);
13824 else
13825 tmpreg = reg;
13826 emit_move_insn (tmpreg, new_rtx);
13828 if (reg != 0)
13830 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13831 tmpreg, 1, OPTAB_DIRECT);
13832 new_rtx = reg;
13834 else
13835 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13837 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13839 /* This symbol may be referenced via a displacement from the PIC
13840 base address (@GOTOFF). */
13842 if (GET_CODE (addr) == CONST)
13843 addr = XEXP (addr, 0);
13844 if (GET_CODE (addr) == PLUS)
13846 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13847 UNSPEC_GOTOFF);
13848 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13850 else
13851 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13852 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13853 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13855 if (reg != 0)
13857 emit_move_insn (reg, new_rtx);
13858 new_rtx = reg;
13861 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13862 /* We can't use @GOTOFF for text labels on VxWorks;
13863 see gotoff_operand. */
13864 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13866 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13867 if (tmp)
13868 return tmp;
13870 /* For x64 PE-COFF there is no GOT table. So we use address
13871 directly. */
13872 if (TARGET_64BIT && TARGET_PECOFF)
13874 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13875 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13877 if (reg == 0)
13878 reg = gen_reg_rtx (Pmode);
13879 emit_move_insn (reg, new_rtx);
13880 new_rtx = reg;
13882 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13884 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13885 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13886 new_rtx = gen_const_mem (Pmode, new_rtx);
13887 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13889 if (reg == 0)
13890 reg = gen_reg_rtx (Pmode);
13891 /* Use directly gen_movsi, otherwise the address is loaded
13892 into register for CSE. We don't want to CSE this addresses,
13893 instead we CSE addresses from the GOT table, so skip this. */
13894 emit_insn (gen_movsi (reg, new_rtx));
13895 new_rtx = reg;
13897 else
13899 /* This symbol must be referenced via a load from the
13900 Global Offset Table (@GOT). */
13902 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13903 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13904 if (TARGET_64BIT)
13905 new_rtx = force_reg (Pmode, new_rtx);
13906 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13907 new_rtx = gen_const_mem (Pmode, new_rtx);
13908 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13910 if (reg == 0)
13911 reg = gen_reg_rtx (Pmode);
13912 emit_move_insn (reg, new_rtx);
13913 new_rtx = reg;
13916 else
13918 if (CONST_INT_P (addr)
13919 && !x86_64_immediate_operand (addr, VOIDmode))
13921 if (reg)
13923 emit_move_insn (reg, addr);
13924 new_rtx = reg;
13926 else
13927 new_rtx = force_reg (Pmode, addr);
13929 else if (GET_CODE (addr) == CONST)
13931 addr = XEXP (addr, 0);
13933 /* We must match stuff we generate before. Assume the only
13934 unspecs that can get here are ours. Not that we could do
13935 anything with them anyway.... */
13936 if (GET_CODE (addr) == UNSPEC
13937 || (GET_CODE (addr) == PLUS
13938 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13939 return orig;
13940 gcc_assert (GET_CODE (addr) == PLUS);
13942 if (GET_CODE (addr) == PLUS)
13944 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13946 /* Check first to see if this is a constant offset from a @GOTOFF
13947 symbol reference. */
13948 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13949 && CONST_INT_P (op1))
13951 if (!TARGET_64BIT)
13953 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13954 UNSPEC_GOTOFF);
13955 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13956 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13957 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13959 if (reg != 0)
13961 emit_move_insn (reg, new_rtx);
13962 new_rtx = reg;
13965 else
13967 if (INTVAL (op1) < -16*1024*1024
13968 || INTVAL (op1) >= 16*1024*1024)
13970 if (!x86_64_immediate_operand (op1, Pmode))
13971 op1 = force_reg (Pmode, op1);
13972 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13976 else
13978 rtx base = legitimize_pic_address (op0, reg);
13979 machine_mode mode = GET_MODE (base);
13980 new_rtx
13981 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13983 if (CONST_INT_P (new_rtx))
13985 if (INTVAL (new_rtx) < -16*1024*1024
13986 || INTVAL (new_rtx) >= 16*1024*1024)
13988 if (!x86_64_immediate_operand (new_rtx, mode))
13989 new_rtx = force_reg (mode, new_rtx);
13990 new_rtx
13991 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13993 else
13994 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13996 else
13998 /* For %rip addressing, we have to use just disp32, not
13999 base nor index. */
14000 if (TARGET_64BIT
14001 && (GET_CODE (base) == SYMBOL_REF
14002 || GET_CODE (base) == LABEL_REF))
14003 base = force_reg (mode, base);
14004 if (GET_CODE (new_rtx) == PLUS
14005 && CONSTANT_P (XEXP (new_rtx, 1)))
14007 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
14008 new_rtx = XEXP (new_rtx, 1);
14010 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
14015 return new_rtx;
14018 /* Load the thread pointer. If TO_REG is true, force it into a register. */
14020 static rtx
14021 get_thread_pointer (machine_mode tp_mode, bool to_reg)
14023 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
14025 if (GET_MODE (tp) != tp_mode)
14027 gcc_assert (GET_MODE (tp) == SImode);
14028 gcc_assert (tp_mode == DImode);
14030 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
14033 if (to_reg)
14034 tp = copy_to_mode_reg (tp_mode, tp);
14036 return tp;
14039 /* Construct the SYMBOL_REF for the tls_get_addr function. */
14041 static GTY(()) rtx ix86_tls_symbol;
14043 static rtx
14044 ix86_tls_get_addr (void)
14046 if (!ix86_tls_symbol)
14048 const char *sym
14049 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
14050 ? "___tls_get_addr" : "__tls_get_addr");
14052 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
14055 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
14057 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
14058 UNSPEC_PLTOFF);
14059 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
14060 gen_rtx_CONST (Pmode, unspec));
14063 return ix86_tls_symbol;
14066 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
14068 static GTY(()) rtx ix86_tls_module_base_symbol;
14071 ix86_tls_module_base (void)
14073 if (!ix86_tls_module_base_symbol)
14075 ix86_tls_module_base_symbol
14076 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
14078 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
14079 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
14082 return ix86_tls_module_base_symbol;
14085 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
14086 false if we expect this to be used for a memory address and true if
14087 we expect to load the address into a register. */
14089 static rtx
14090 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
14092 rtx dest, base, off;
14093 rtx pic = NULL_RTX, tp = NULL_RTX;
14094 machine_mode tp_mode = Pmode;
14095 int type;
14097 /* Fall back to global dynamic model if tool chain cannot support local
14098 dynamic. */
14099 if (TARGET_SUN_TLS && !TARGET_64BIT
14100 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
14101 && model == TLS_MODEL_LOCAL_DYNAMIC)
14102 model = TLS_MODEL_GLOBAL_DYNAMIC;
14104 switch (model)
14106 case TLS_MODEL_GLOBAL_DYNAMIC:
14107 dest = gen_reg_rtx (Pmode);
14109 if (!TARGET_64BIT)
14111 if (flag_pic && !TARGET_PECOFF)
14112 pic = pic_offset_table_rtx;
14113 else
14115 pic = gen_reg_rtx (Pmode);
14116 emit_insn (gen_set_got (pic));
14120 if (TARGET_GNU2_TLS)
14122 if (TARGET_64BIT)
14123 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
14124 else
14125 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
14127 tp = get_thread_pointer (Pmode, true);
14128 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
14130 if (GET_MODE (x) != Pmode)
14131 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14133 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
14135 else
14137 rtx caddr = ix86_tls_get_addr ();
14139 if (TARGET_64BIT)
14141 rtx rax = gen_rtx_REG (Pmode, AX_REG);
14142 rtx_insn *insns;
14144 start_sequence ();
14145 emit_call_insn
14146 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
14147 insns = get_insns ();
14148 end_sequence ();
14150 if (GET_MODE (x) != Pmode)
14151 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14153 RTL_CONST_CALL_P (insns) = 1;
14154 emit_libcall_block (insns, dest, rax, x);
14156 else
14157 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
14159 break;
14161 case TLS_MODEL_LOCAL_DYNAMIC:
14162 base = gen_reg_rtx (Pmode);
14164 if (!TARGET_64BIT)
14166 if (flag_pic)
14167 pic = pic_offset_table_rtx;
14168 else
14170 pic = gen_reg_rtx (Pmode);
14171 emit_insn (gen_set_got (pic));
14175 if (TARGET_GNU2_TLS)
14177 rtx tmp = ix86_tls_module_base ();
14179 if (TARGET_64BIT)
14180 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
14181 else
14182 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
14184 tp = get_thread_pointer (Pmode, true);
14185 set_unique_reg_note (get_last_insn (), REG_EQUAL,
14186 gen_rtx_MINUS (Pmode, tmp, tp));
14188 else
14190 rtx caddr = ix86_tls_get_addr ();
14192 if (TARGET_64BIT)
14194 rtx rax = gen_rtx_REG (Pmode, AX_REG);
14195 rtx_insn *insns;
14196 rtx eqv;
14198 start_sequence ();
14199 emit_call_insn
14200 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
14201 insns = get_insns ();
14202 end_sequence ();
14204 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
14205 share the LD_BASE result with other LD model accesses. */
14206 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
14207 UNSPEC_TLS_LD_BASE);
14209 RTL_CONST_CALL_P (insns) = 1;
14210 emit_libcall_block (insns, base, rax, eqv);
14212 else
14213 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
14216 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
14217 off = gen_rtx_CONST (Pmode, off);
14219 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
14221 if (TARGET_GNU2_TLS)
14223 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
14225 if (GET_MODE (x) != Pmode)
14226 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14228 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
14230 break;
14232 case TLS_MODEL_INITIAL_EXEC:
14233 if (TARGET_64BIT)
14235 if (TARGET_SUN_TLS && !TARGET_X32)
14237 /* The Sun linker took the AMD64 TLS spec literally
14238 and can only handle %rax as destination of the
14239 initial executable code sequence. */
14241 dest = gen_reg_rtx (DImode);
14242 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
14243 return dest;
14246 /* Generate DImode references to avoid %fs:(%reg32)
14247 problems and linker IE->LE relaxation bug. */
14248 tp_mode = DImode;
14249 pic = NULL;
14250 type = UNSPEC_GOTNTPOFF;
14252 else if (flag_pic)
14254 pic = pic_offset_table_rtx;
14255 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
14257 else if (!TARGET_ANY_GNU_TLS)
14259 pic = gen_reg_rtx (Pmode);
14260 emit_insn (gen_set_got (pic));
14261 type = UNSPEC_GOTTPOFF;
14263 else
14265 pic = NULL;
14266 type = UNSPEC_INDNTPOFF;
14269 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
14270 off = gen_rtx_CONST (tp_mode, off);
14271 if (pic)
14272 off = gen_rtx_PLUS (tp_mode, pic, off);
14273 off = gen_const_mem (tp_mode, off);
14274 set_mem_alias_set (off, ix86_GOT_alias_set ());
14276 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14278 base = get_thread_pointer (tp_mode,
14279 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14280 off = force_reg (tp_mode, off);
14281 return gen_rtx_PLUS (tp_mode, base, off);
14283 else
14285 base = get_thread_pointer (Pmode, true);
14286 dest = gen_reg_rtx (Pmode);
14287 emit_insn (ix86_gen_sub3 (dest, base, off));
14289 break;
14291 case TLS_MODEL_LOCAL_EXEC:
14292 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
14293 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14294 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
14295 off = gen_rtx_CONST (Pmode, off);
14297 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14299 base = get_thread_pointer (Pmode,
14300 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14301 return gen_rtx_PLUS (Pmode, base, off);
14303 else
14305 base = get_thread_pointer (Pmode, true);
14306 dest = gen_reg_rtx (Pmode);
14307 emit_insn (ix86_gen_sub3 (dest, base, off));
14309 break;
14311 default:
14312 gcc_unreachable ();
14315 return dest;
14318 /* Create or return the unique __imp_DECL dllimport symbol corresponding
14319 to symbol DECL if BEIMPORT is true. Otherwise create or return the
14320 unique refptr-DECL symbol corresponding to symbol DECL. */
14322 struct dllimport_hasher : ggc_cache_ptr_hash<tree_map>
14324 static inline hashval_t hash (tree_map *m) { return m->hash; }
14325 static inline bool
14326 equal (tree_map *a, tree_map *b)
14328 return a->base.from == b->base.from;
14331 static int
14332 keep_cache_entry (tree_map *&m)
14334 return ggc_marked_p (m->base.from);
14338 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
14340 static tree
14341 get_dllimport_decl (tree decl, bool beimport)
14343 struct tree_map *h, in;
14344 const char *name;
14345 const char *prefix;
14346 size_t namelen, prefixlen;
14347 char *imp_name;
14348 tree to;
14349 rtx rtl;
14351 if (!dllimport_map)
14352 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
14354 in.hash = htab_hash_pointer (decl);
14355 in.base.from = decl;
14356 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
14357 h = *loc;
14358 if (h)
14359 return h->to;
14361 *loc = h = ggc_alloc<tree_map> ();
14362 h->hash = in.hash;
14363 h->base.from = decl;
14364 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
14365 VAR_DECL, NULL, ptr_type_node);
14366 DECL_ARTIFICIAL (to) = 1;
14367 DECL_IGNORED_P (to) = 1;
14368 DECL_EXTERNAL (to) = 1;
14369 TREE_READONLY (to) = 1;
14371 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
14372 name = targetm.strip_name_encoding (name);
14373 if (beimport)
14374 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
14375 ? "*__imp_" : "*__imp__";
14376 else
14377 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
14378 namelen = strlen (name);
14379 prefixlen = strlen (prefix);
14380 imp_name = (char *) alloca (namelen + prefixlen + 1);
14381 memcpy (imp_name, prefix, prefixlen);
14382 memcpy (imp_name + prefixlen, name, namelen + 1);
14384 name = ggc_alloc_string (imp_name, namelen + prefixlen);
14385 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
14386 SET_SYMBOL_REF_DECL (rtl, to);
14387 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
14388 if (!beimport)
14390 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
14391 #ifdef SUB_TARGET_RECORD_STUB
14392 SUB_TARGET_RECORD_STUB (name);
14393 #endif
14396 rtl = gen_const_mem (Pmode, rtl);
14397 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
14399 SET_DECL_RTL (to, rtl);
14400 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
14402 return to;
14405 /* Expand SYMBOL into its corresponding far-addresse symbol.
14406 WANT_REG is true if we require the result be a register. */
14408 static rtx
14409 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
14411 tree imp_decl;
14412 rtx x;
14414 gcc_assert (SYMBOL_REF_DECL (symbol));
14415 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
14417 x = DECL_RTL (imp_decl);
14418 if (want_reg)
14419 x = force_reg (Pmode, x);
14420 return x;
14423 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
14424 true if we require the result be a register. */
14426 static rtx
14427 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
14429 tree imp_decl;
14430 rtx x;
14432 gcc_assert (SYMBOL_REF_DECL (symbol));
14433 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
14435 x = DECL_RTL (imp_decl);
14436 if (want_reg)
14437 x = force_reg (Pmode, x);
14438 return x;
14441 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
14442 is true if we require the result be a register. */
14444 static rtx
14445 legitimize_pe_coff_symbol (rtx addr, bool inreg)
14447 if (!TARGET_PECOFF)
14448 return NULL_RTX;
14450 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14452 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
14453 return legitimize_dllimport_symbol (addr, inreg);
14454 if (GET_CODE (addr) == CONST
14455 && GET_CODE (XEXP (addr, 0)) == PLUS
14456 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14457 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
14459 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
14460 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14464 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
14465 return NULL_RTX;
14466 if (GET_CODE (addr) == SYMBOL_REF
14467 && !is_imported_p (addr)
14468 && SYMBOL_REF_EXTERNAL_P (addr)
14469 && SYMBOL_REF_DECL (addr))
14470 return legitimize_pe_coff_extern_decl (addr, inreg);
14472 if (GET_CODE (addr) == CONST
14473 && GET_CODE (XEXP (addr, 0)) == PLUS
14474 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14475 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
14476 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
14477 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
14479 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
14480 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14482 return NULL_RTX;
14485 /* Try machine-dependent ways of modifying an illegitimate address
14486 to be legitimate. If we find one, return the new, valid address.
14487 This macro is used in only one place: `memory_address' in explow.c.
14489 OLDX is the address as it was before break_out_memory_refs was called.
14490 In some cases it is useful to look at this to decide what needs to be done.
14492 It is always safe for this macro to do nothing. It exists to recognize
14493 opportunities to optimize the output.
14495 For the 80386, we handle X+REG by loading X into a register R and
14496 using R+REG. R will go in a general reg and indexing will be used.
14497 However, if REG is a broken-out memory address or multiplication,
14498 nothing needs to be done because REG can certainly go in a general reg.
14500 When -fpic is used, special handling is needed for symbolic references.
14501 See comments by legitimize_pic_address in i386.c for details. */
14503 static rtx
14504 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
14506 bool changed = false;
14507 unsigned log;
14509 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
14510 if (log)
14511 return legitimize_tls_address (x, (enum tls_model) log, false);
14512 if (GET_CODE (x) == CONST
14513 && GET_CODE (XEXP (x, 0)) == PLUS
14514 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
14515 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
14517 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
14518 (enum tls_model) log, false);
14519 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
14522 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14524 rtx tmp = legitimize_pe_coff_symbol (x, true);
14525 if (tmp)
14526 return tmp;
14529 if (flag_pic && SYMBOLIC_CONST (x))
14530 return legitimize_pic_address (x, 0);
14532 #if TARGET_MACHO
14533 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
14534 return machopic_indirect_data_reference (x, 0);
14535 #endif
14537 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
14538 if (GET_CODE (x) == ASHIFT
14539 && CONST_INT_P (XEXP (x, 1))
14540 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
14542 changed = true;
14543 log = INTVAL (XEXP (x, 1));
14544 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
14545 GEN_INT (1 << log));
14548 if (GET_CODE (x) == PLUS)
14550 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
14552 if (GET_CODE (XEXP (x, 0)) == ASHIFT
14553 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14554 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
14556 changed = true;
14557 log = INTVAL (XEXP (XEXP (x, 0), 1));
14558 XEXP (x, 0) = gen_rtx_MULT (Pmode,
14559 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
14560 GEN_INT (1 << log));
14563 if (GET_CODE (XEXP (x, 1)) == ASHIFT
14564 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
14565 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
14567 changed = true;
14568 log = INTVAL (XEXP (XEXP (x, 1), 1));
14569 XEXP (x, 1) = gen_rtx_MULT (Pmode,
14570 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
14571 GEN_INT (1 << log));
14574 /* Put multiply first if it isn't already. */
14575 if (GET_CODE (XEXP (x, 1)) == MULT)
14577 std::swap (XEXP (x, 0), XEXP (x, 1));
14578 changed = true;
14581 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
14582 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
14583 created by virtual register instantiation, register elimination, and
14584 similar optimizations. */
14585 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
14587 changed = true;
14588 x = gen_rtx_PLUS (Pmode,
14589 gen_rtx_PLUS (Pmode, XEXP (x, 0),
14590 XEXP (XEXP (x, 1), 0)),
14591 XEXP (XEXP (x, 1), 1));
14594 /* Canonicalize
14595 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
14596 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
14597 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
14598 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14599 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
14600 && CONSTANT_P (XEXP (x, 1)))
14602 rtx constant;
14603 rtx other = NULL_RTX;
14605 if (CONST_INT_P (XEXP (x, 1)))
14607 constant = XEXP (x, 1);
14608 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
14610 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
14612 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
14613 other = XEXP (x, 1);
14615 else
14616 constant = 0;
14618 if (constant)
14620 changed = true;
14621 x = gen_rtx_PLUS (Pmode,
14622 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
14623 XEXP (XEXP (XEXP (x, 0), 1), 0)),
14624 plus_constant (Pmode, other,
14625 INTVAL (constant)));
14629 if (changed && ix86_legitimate_address_p (mode, x, false))
14630 return x;
14632 if (GET_CODE (XEXP (x, 0)) == MULT)
14634 changed = true;
14635 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
14638 if (GET_CODE (XEXP (x, 1)) == MULT)
14640 changed = true;
14641 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
14644 if (changed
14645 && REG_P (XEXP (x, 1))
14646 && REG_P (XEXP (x, 0)))
14647 return x;
14649 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
14651 changed = true;
14652 x = legitimize_pic_address (x, 0);
14655 if (changed && ix86_legitimate_address_p (mode, x, false))
14656 return x;
14658 if (REG_P (XEXP (x, 0)))
14660 rtx temp = gen_reg_rtx (Pmode);
14661 rtx val = force_operand (XEXP (x, 1), temp);
14662 if (val != temp)
14664 val = convert_to_mode (Pmode, val, 1);
14665 emit_move_insn (temp, val);
14668 XEXP (x, 1) = temp;
14669 return x;
14672 else if (REG_P (XEXP (x, 1)))
14674 rtx temp = gen_reg_rtx (Pmode);
14675 rtx val = force_operand (XEXP (x, 0), temp);
14676 if (val != temp)
14678 val = convert_to_mode (Pmode, val, 1);
14679 emit_move_insn (temp, val);
14682 XEXP (x, 0) = temp;
14683 return x;
14687 return x;
14690 /* Print an integer constant expression in assembler syntax. Addition
14691 and subtraction are the only arithmetic that may appear in these
14692 expressions. FILE is the stdio stream to write to, X is the rtx, and
14693 CODE is the operand print code from the output string. */
14695 static void
14696 output_pic_addr_const (FILE *file, rtx x, int code)
14698 char buf[256];
14700 switch (GET_CODE (x))
14702 case PC:
14703 gcc_assert (flag_pic);
14704 putc ('.', file);
14705 break;
14707 case SYMBOL_REF:
14708 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14709 output_addr_const (file, x);
14710 else
14712 const char *name = XSTR (x, 0);
14714 /* Mark the decl as referenced so that cgraph will
14715 output the function. */
14716 if (SYMBOL_REF_DECL (x))
14717 mark_decl_referenced (SYMBOL_REF_DECL (x));
14719 #if TARGET_MACHO
14720 if (MACHOPIC_INDIRECT
14721 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14722 name = machopic_indirection_name (x, /*stub_p=*/true);
14723 #endif
14724 assemble_name (file, name);
14726 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14727 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14728 fputs ("@PLT", file);
14729 break;
14731 case LABEL_REF:
14732 x = XEXP (x, 0);
14733 /* FALLTHRU */
14734 case CODE_LABEL:
14735 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14736 assemble_name (asm_out_file, buf);
14737 break;
14739 case CONST_INT:
14740 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14741 break;
14743 case CONST:
14744 /* This used to output parentheses around the expression,
14745 but that does not work on the 386 (either ATT or BSD assembler). */
14746 output_pic_addr_const (file, XEXP (x, 0), code);
14747 break;
14749 case CONST_DOUBLE:
14750 /* We can't handle floating point constants;
14751 TARGET_PRINT_OPERAND must handle them. */
14752 output_operand_lossage ("floating constant misused");
14753 break;
14755 case PLUS:
14756 /* Some assemblers need integer constants to appear first. */
14757 if (CONST_INT_P (XEXP (x, 0)))
14759 output_pic_addr_const (file, XEXP (x, 0), code);
14760 putc ('+', file);
14761 output_pic_addr_const (file, XEXP (x, 1), code);
14763 else
14765 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14766 output_pic_addr_const (file, XEXP (x, 1), code);
14767 putc ('+', file);
14768 output_pic_addr_const (file, XEXP (x, 0), code);
14770 break;
14772 case MINUS:
14773 if (!TARGET_MACHO)
14774 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14775 output_pic_addr_const (file, XEXP (x, 0), code);
14776 putc ('-', file);
14777 output_pic_addr_const (file, XEXP (x, 1), code);
14778 if (!TARGET_MACHO)
14779 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14780 break;
14782 case UNSPEC:
14783 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14785 bool f = i386_asm_output_addr_const_extra (file, x);
14786 gcc_assert (f);
14787 break;
14790 gcc_assert (XVECLEN (x, 0) == 1);
14791 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14792 switch (XINT (x, 1))
14794 case UNSPEC_GOT:
14795 fputs ("@GOT", file);
14796 break;
14797 case UNSPEC_GOTOFF:
14798 fputs ("@GOTOFF", file);
14799 break;
14800 case UNSPEC_PLTOFF:
14801 fputs ("@PLTOFF", file);
14802 break;
14803 case UNSPEC_PCREL:
14804 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14805 "(%rip)" : "[rip]", file);
14806 break;
14807 case UNSPEC_GOTPCREL:
14808 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14809 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14810 break;
14811 case UNSPEC_GOTTPOFF:
14812 /* FIXME: This might be @TPOFF in Sun ld too. */
14813 fputs ("@gottpoff", file);
14814 break;
14815 case UNSPEC_TPOFF:
14816 fputs ("@tpoff", file);
14817 break;
14818 case UNSPEC_NTPOFF:
14819 if (TARGET_64BIT)
14820 fputs ("@tpoff", file);
14821 else
14822 fputs ("@ntpoff", file);
14823 break;
14824 case UNSPEC_DTPOFF:
14825 fputs ("@dtpoff", file);
14826 break;
14827 case UNSPEC_GOTNTPOFF:
14828 if (TARGET_64BIT)
14829 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14830 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14831 else
14832 fputs ("@gotntpoff", file);
14833 break;
14834 case UNSPEC_INDNTPOFF:
14835 fputs ("@indntpoff", file);
14836 break;
14837 #if TARGET_MACHO
14838 case UNSPEC_MACHOPIC_OFFSET:
14839 putc ('-', file);
14840 machopic_output_function_base_name (file);
14841 break;
14842 #endif
14843 default:
14844 output_operand_lossage ("invalid UNSPEC as operand");
14845 break;
14847 break;
14849 default:
14850 output_operand_lossage ("invalid expression as operand");
14854 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14855 We need to emit DTP-relative relocations. */
14857 static void ATTRIBUTE_UNUSED
14858 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14860 fputs (ASM_LONG, file);
14861 output_addr_const (file, x);
14862 fputs ("@dtpoff", file);
14863 switch (size)
14865 case 4:
14866 break;
14867 case 8:
14868 fputs (", 0", file);
14869 break;
14870 default:
14871 gcc_unreachable ();
14875 /* Return true if X is a representation of the PIC register. This copes
14876 with calls from ix86_find_base_term, where the register might have
14877 been replaced by a cselib value. */
14879 static bool
14880 ix86_pic_register_p (rtx x)
14882 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14883 return (pic_offset_table_rtx
14884 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14885 else if (!REG_P (x))
14886 return false;
14887 else if (pic_offset_table_rtx)
14889 if (REGNO (x) == REGNO (pic_offset_table_rtx))
14890 return true;
14891 if (HARD_REGISTER_P (x)
14892 && !HARD_REGISTER_P (pic_offset_table_rtx)
14893 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
14894 return true;
14895 return false;
14897 else
14898 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14901 /* Helper function for ix86_delegitimize_address.
14902 Attempt to delegitimize TLS local-exec accesses. */
14904 static rtx
14905 ix86_delegitimize_tls_address (rtx orig_x)
14907 rtx x = orig_x, unspec;
14908 struct ix86_address addr;
14910 if (!TARGET_TLS_DIRECT_SEG_REFS)
14911 return orig_x;
14912 if (MEM_P (x))
14913 x = XEXP (x, 0);
14914 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14915 return orig_x;
14916 if (ix86_decompose_address (x, &addr) == 0
14917 || addr.seg != DEFAULT_TLS_SEG_REG
14918 || addr.disp == NULL_RTX
14919 || GET_CODE (addr.disp) != CONST)
14920 return orig_x;
14921 unspec = XEXP (addr.disp, 0);
14922 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14923 unspec = XEXP (unspec, 0);
14924 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14925 return orig_x;
14926 x = XVECEXP (unspec, 0, 0);
14927 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14928 if (unspec != XEXP (addr.disp, 0))
14929 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14930 if (addr.index)
14932 rtx idx = addr.index;
14933 if (addr.scale != 1)
14934 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14935 x = gen_rtx_PLUS (Pmode, idx, x);
14937 if (addr.base)
14938 x = gen_rtx_PLUS (Pmode, addr.base, x);
14939 if (MEM_P (orig_x))
14940 x = replace_equiv_address_nv (orig_x, x);
14941 return x;
14944 /* In the name of slightly smaller debug output, and to cater to
14945 general assembler lossage, recognize PIC+GOTOFF and turn it back
14946 into a direct symbol reference.
14948 On Darwin, this is necessary to avoid a crash, because Darwin
14949 has a different PIC label for each routine but the DWARF debugging
14950 information is not associated with any particular routine, so it's
14951 necessary to remove references to the PIC label from RTL stored by
14952 the DWARF output code. */
14954 static rtx
14955 ix86_delegitimize_address (rtx x)
14957 rtx orig_x = delegitimize_mem_from_attrs (x);
14958 /* addend is NULL or some rtx if x is something+GOTOFF where
14959 something doesn't include the PIC register. */
14960 rtx addend = NULL_RTX;
14961 /* reg_addend is NULL or a multiple of some register. */
14962 rtx reg_addend = NULL_RTX;
14963 /* const_addend is NULL or a const_int. */
14964 rtx const_addend = NULL_RTX;
14965 /* This is the result, or NULL. */
14966 rtx result = NULL_RTX;
14968 x = orig_x;
14970 if (MEM_P (x))
14971 x = XEXP (x, 0);
14973 if (TARGET_64BIT)
14975 if (GET_CODE (x) == CONST
14976 && GET_CODE (XEXP (x, 0)) == PLUS
14977 && GET_MODE (XEXP (x, 0)) == Pmode
14978 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14979 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14980 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14982 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14983 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14984 if (MEM_P (orig_x))
14985 x = replace_equiv_address_nv (orig_x, x);
14986 return x;
14989 if (GET_CODE (x) == CONST
14990 && GET_CODE (XEXP (x, 0)) == UNSPEC
14991 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14992 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14993 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14995 x = XVECEXP (XEXP (x, 0), 0, 0);
14996 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14998 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14999 GET_MODE (x), 0);
15000 if (x == NULL_RTX)
15001 return orig_x;
15003 return x;
15006 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
15007 return ix86_delegitimize_tls_address (orig_x);
15009 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
15010 and -mcmodel=medium -fpic. */
15013 if (GET_CODE (x) != PLUS
15014 || GET_CODE (XEXP (x, 1)) != CONST)
15015 return ix86_delegitimize_tls_address (orig_x);
15017 if (ix86_pic_register_p (XEXP (x, 0)))
15018 /* %ebx + GOT/GOTOFF */
15020 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15022 /* %ebx + %reg * scale + GOT/GOTOFF */
15023 reg_addend = XEXP (x, 0);
15024 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
15025 reg_addend = XEXP (reg_addend, 1);
15026 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
15027 reg_addend = XEXP (reg_addend, 0);
15028 else
15030 reg_addend = NULL_RTX;
15031 addend = XEXP (x, 0);
15034 else
15035 addend = XEXP (x, 0);
15037 x = XEXP (XEXP (x, 1), 0);
15038 if (GET_CODE (x) == PLUS
15039 && CONST_INT_P (XEXP (x, 1)))
15041 const_addend = XEXP (x, 1);
15042 x = XEXP (x, 0);
15045 if (GET_CODE (x) == UNSPEC
15046 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
15047 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
15048 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
15049 && !MEM_P (orig_x) && !addend)))
15050 result = XVECEXP (x, 0, 0);
15052 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
15053 && !MEM_P (orig_x))
15054 result = XVECEXP (x, 0, 0);
15056 if (! result)
15057 return ix86_delegitimize_tls_address (orig_x);
15059 if (const_addend)
15060 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
15061 if (reg_addend)
15062 result = gen_rtx_PLUS (Pmode, reg_addend, result);
15063 if (addend)
15065 /* If the rest of original X doesn't involve the PIC register, add
15066 addend and subtract pic_offset_table_rtx. This can happen e.g.
15067 for code like:
15068 leal (%ebx, %ecx, 4), %ecx
15070 movl foo@GOTOFF(%ecx), %edx
15071 in which case we return (%ecx - %ebx) + foo
15072 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
15073 and reload has completed. */
15074 if (pic_offset_table_rtx
15075 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
15076 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
15077 pic_offset_table_rtx),
15078 result);
15079 else if (pic_offset_table_rtx && !TARGET_MACHO && !TARGET_VXWORKS_RTP)
15081 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
15082 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
15083 result = gen_rtx_PLUS (Pmode, tmp, result);
15085 else
15086 return orig_x;
15088 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
15090 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
15091 if (result == NULL_RTX)
15092 return orig_x;
15094 return result;
15097 /* If X is a machine specific address (i.e. a symbol or label being
15098 referenced as a displacement from the GOT implemented using an
15099 UNSPEC), then return the base term. Otherwise return X. */
15102 ix86_find_base_term (rtx x)
15104 rtx term;
15106 if (TARGET_64BIT)
15108 if (GET_CODE (x) != CONST)
15109 return x;
15110 term = XEXP (x, 0);
15111 if (GET_CODE (term) == PLUS
15112 && CONST_INT_P (XEXP (term, 1)))
15113 term = XEXP (term, 0);
15114 if (GET_CODE (term) != UNSPEC
15115 || (XINT (term, 1) != UNSPEC_GOTPCREL
15116 && XINT (term, 1) != UNSPEC_PCREL))
15117 return x;
15119 return XVECEXP (term, 0, 0);
15122 return ix86_delegitimize_address (x);
15125 static void
15126 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
15127 bool fp, FILE *file)
15129 const char *suffix;
15131 if (mode == CCFPmode || mode == CCFPUmode)
15133 code = ix86_fp_compare_code_to_integer (code);
15134 mode = CCmode;
15136 if (reverse)
15137 code = reverse_condition (code);
15139 switch (code)
15141 case EQ:
15142 switch (mode)
15144 case CCAmode:
15145 suffix = "a";
15146 break;
15147 case CCCmode:
15148 suffix = "c";
15149 break;
15150 case CCOmode:
15151 suffix = "o";
15152 break;
15153 case CCPmode:
15154 suffix = "p";
15155 break;
15156 case CCSmode:
15157 suffix = "s";
15158 break;
15159 default:
15160 suffix = "e";
15161 break;
15163 break;
15164 case NE:
15165 switch (mode)
15167 case CCAmode:
15168 suffix = "na";
15169 break;
15170 case CCCmode:
15171 suffix = "nc";
15172 break;
15173 case CCOmode:
15174 suffix = "no";
15175 break;
15176 case CCPmode:
15177 suffix = "np";
15178 break;
15179 case CCSmode:
15180 suffix = "ns";
15181 break;
15182 default:
15183 suffix = "ne";
15184 break;
15186 break;
15187 case GT:
15188 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
15189 suffix = "g";
15190 break;
15191 case GTU:
15192 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
15193 Those same assemblers have the same but opposite lossage on cmov. */
15194 if (mode == CCmode)
15195 suffix = fp ? "nbe" : "a";
15196 else
15197 gcc_unreachable ();
15198 break;
15199 case LT:
15200 switch (mode)
15202 case CCNOmode:
15203 case CCGOCmode:
15204 suffix = "s";
15205 break;
15207 case CCmode:
15208 case CCGCmode:
15209 suffix = "l";
15210 break;
15212 default:
15213 gcc_unreachable ();
15215 break;
15216 case LTU:
15217 if (mode == CCmode)
15218 suffix = "b";
15219 else if (mode == CCCmode)
15220 suffix = fp ? "b" : "c";
15221 else
15222 gcc_unreachable ();
15223 break;
15224 case GE:
15225 switch (mode)
15227 case CCNOmode:
15228 case CCGOCmode:
15229 suffix = "ns";
15230 break;
15232 case CCmode:
15233 case CCGCmode:
15234 suffix = "ge";
15235 break;
15237 default:
15238 gcc_unreachable ();
15240 break;
15241 case GEU:
15242 if (mode == CCmode)
15243 suffix = "nb";
15244 else if (mode == CCCmode)
15245 suffix = fp ? "nb" : "nc";
15246 else
15247 gcc_unreachable ();
15248 break;
15249 case LE:
15250 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
15251 suffix = "le";
15252 break;
15253 case LEU:
15254 if (mode == CCmode)
15255 suffix = "be";
15256 else
15257 gcc_unreachable ();
15258 break;
15259 case UNORDERED:
15260 suffix = fp ? "u" : "p";
15261 break;
15262 case ORDERED:
15263 suffix = fp ? "nu" : "np";
15264 break;
15265 default:
15266 gcc_unreachable ();
15268 fputs (suffix, file);
15271 /* Print the name of register X to FILE based on its machine mode and number.
15272 If CODE is 'w', pretend the mode is HImode.
15273 If CODE is 'b', pretend the mode is QImode.
15274 If CODE is 'k', pretend the mode is SImode.
15275 If CODE is 'q', pretend the mode is DImode.
15276 If CODE is 'x', pretend the mode is V4SFmode.
15277 If CODE is 't', pretend the mode is V8SFmode.
15278 If CODE is 'g', pretend the mode is V16SFmode.
15279 If CODE is 'h', pretend the reg is the 'high' byte register.
15280 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
15281 If CODE is 'd', duplicate the operand for AVX instruction.
15284 void
15285 print_reg (rtx x, int code, FILE *file)
15287 const char *reg;
15288 int msize;
15289 unsigned int regno;
15290 bool duplicated;
15292 if (ASSEMBLER_DIALECT == ASM_ATT)
15293 putc ('%', file);
15295 if (x == pc_rtx)
15297 gcc_assert (TARGET_64BIT);
15298 fputs ("rip", file);
15299 return;
15302 if (code == 'y' && STACK_TOP_P (x))
15304 fputs ("st(0)", file);
15305 return;
15308 if (code == 'w')
15309 msize = 2;
15310 else if (code == 'b')
15311 msize = 1;
15312 else if (code == 'k')
15313 msize = 4;
15314 else if (code == 'q')
15315 msize = 8;
15316 else if (code == 'h')
15317 msize = 0;
15318 else if (code == 'x')
15319 msize = 16;
15320 else if (code == 't')
15321 msize = 32;
15322 else if (code == 'g')
15323 msize = 64;
15324 else
15325 msize = GET_MODE_SIZE (GET_MODE (x));
15327 regno = true_regnum (x);
15329 gcc_assert (regno != ARG_POINTER_REGNUM
15330 && regno != FRAME_POINTER_REGNUM
15331 && regno != FLAGS_REG
15332 && regno != FPSR_REG
15333 && regno != FPCR_REG);
15335 duplicated = code == 'd' && TARGET_AVX;
15337 switch (msize)
15339 case 8:
15340 case 4:
15341 if (LEGACY_INT_REGNO_P (regno))
15342 putc (msize == 8 && TARGET_64BIT ? 'r' : 'e', file);
15343 case 16:
15344 case 12:
15345 case 2:
15346 normal:
15347 reg = hi_reg_name[regno];
15348 break;
15349 case 1:
15350 if (regno >= ARRAY_SIZE (qi_reg_name))
15351 goto normal;
15352 reg = qi_reg_name[regno];
15353 break;
15354 case 0:
15355 if (regno >= ARRAY_SIZE (qi_high_reg_name))
15356 goto normal;
15357 reg = qi_high_reg_name[regno];
15358 break;
15359 case 32:
15360 case 64:
15361 if (SSE_REGNO_P (regno))
15363 gcc_assert (!duplicated);
15364 putc (msize == 32 ? 'y' : 'z', file);
15365 reg = hi_reg_name[regno] + 1;
15366 break;
15368 goto normal;
15369 default:
15370 gcc_unreachable ();
15373 fputs (reg, file);
15375 /* Irritatingly, AMD extended registers use
15376 different naming convention: "r%d[bwd]" */
15377 if (REX_INT_REGNO_P (regno))
15379 gcc_assert (TARGET_64BIT);
15380 switch (msize)
15382 case 0:
15383 error ("extended registers have no high halves");
15384 break;
15385 case 1:
15386 putc ('b', file);
15387 break;
15388 case 2:
15389 putc ('w', file);
15390 break;
15391 case 4:
15392 putc ('d', file);
15393 break;
15394 case 8:
15395 /* no suffix */
15396 break;
15397 default:
15398 error ("unsupported operand size for extended register");
15399 break;
15401 return;
15404 if (duplicated)
15406 if (ASSEMBLER_DIALECT == ASM_ATT)
15407 fprintf (file, ", %%%s", reg);
15408 else
15409 fprintf (file, ", %s", reg);
15413 /* Meaning of CODE:
15414 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
15415 C -- print opcode suffix for set/cmov insn.
15416 c -- like C, but print reversed condition
15417 F,f -- likewise, but for floating-point.
15418 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
15419 otherwise nothing
15420 R -- print embeded rounding and sae.
15421 r -- print only sae.
15422 z -- print the opcode suffix for the size of the current operand.
15423 Z -- likewise, with special suffixes for x87 instructions.
15424 * -- print a star (in certain assembler syntax)
15425 A -- print an absolute memory reference.
15426 E -- print address with DImode register names if TARGET_64BIT.
15427 w -- print the operand as if it's a "word" (HImode) even if it isn't.
15428 s -- print a shift double count, followed by the assemblers argument
15429 delimiter.
15430 b -- print the QImode name of the register for the indicated operand.
15431 %b0 would print %al if operands[0] is reg 0.
15432 w -- likewise, print the HImode name of the register.
15433 k -- likewise, print the SImode name of the register.
15434 q -- likewise, print the DImode name of the register.
15435 x -- likewise, print the V4SFmode name of the register.
15436 t -- likewise, print the V8SFmode name of the register.
15437 g -- likewise, print the V16SFmode name of the register.
15438 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
15439 y -- print "st(0)" instead of "st" as a register.
15440 d -- print duplicated register operand for AVX instruction.
15441 D -- print condition for SSE cmp instruction.
15442 P -- if PIC, print an @PLT suffix.
15443 p -- print raw symbol name.
15444 X -- don't print any sort of PIC '@' suffix for a symbol.
15445 & -- print some in-use local-dynamic symbol name.
15446 H -- print a memory address offset by 8; used for sse high-parts
15447 Y -- print condition for XOP pcom* instruction.
15448 + -- print a branch hint as 'cs' or 'ds' prefix
15449 ; -- print a semicolon (after prefixes due to bug in older gas).
15450 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
15451 @ -- print a segment register of thread base pointer load
15452 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
15453 ! -- print MPX prefix for jxx/call/ret instructions if required.
15456 void
15457 ix86_print_operand (FILE *file, rtx x, int code)
15459 if (code)
15461 switch (code)
15463 case 'A':
15464 switch (ASSEMBLER_DIALECT)
15466 case ASM_ATT:
15467 putc ('*', file);
15468 break;
15470 case ASM_INTEL:
15471 /* Intel syntax. For absolute addresses, registers should not
15472 be surrounded by braces. */
15473 if (!REG_P (x))
15475 putc ('[', file);
15476 ix86_print_operand (file, x, 0);
15477 putc (']', file);
15478 return;
15480 break;
15482 default:
15483 gcc_unreachable ();
15486 ix86_print_operand (file, x, 0);
15487 return;
15489 case 'E':
15490 /* Wrap address in an UNSPEC to declare special handling. */
15491 if (TARGET_64BIT)
15492 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
15494 output_address (x);
15495 return;
15497 case 'L':
15498 if (ASSEMBLER_DIALECT == ASM_ATT)
15499 putc ('l', file);
15500 return;
15502 case 'W':
15503 if (ASSEMBLER_DIALECT == ASM_ATT)
15504 putc ('w', file);
15505 return;
15507 case 'B':
15508 if (ASSEMBLER_DIALECT == ASM_ATT)
15509 putc ('b', file);
15510 return;
15512 case 'Q':
15513 if (ASSEMBLER_DIALECT == ASM_ATT)
15514 putc ('l', file);
15515 return;
15517 case 'S':
15518 if (ASSEMBLER_DIALECT == ASM_ATT)
15519 putc ('s', file);
15520 return;
15522 case 'T':
15523 if (ASSEMBLER_DIALECT == ASM_ATT)
15524 putc ('t', file);
15525 return;
15527 case 'O':
15528 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15529 if (ASSEMBLER_DIALECT != ASM_ATT)
15530 return;
15532 switch (GET_MODE_SIZE (GET_MODE (x)))
15534 case 2:
15535 putc ('w', file);
15536 break;
15538 case 4:
15539 putc ('l', file);
15540 break;
15542 case 8:
15543 putc ('q', file);
15544 break;
15546 default:
15547 output_operand_lossage
15548 ("invalid operand size for operand code 'O'");
15549 return;
15552 putc ('.', file);
15553 #endif
15554 return;
15556 case 'z':
15557 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15559 /* Opcodes don't get size suffixes if using Intel opcodes. */
15560 if (ASSEMBLER_DIALECT == ASM_INTEL)
15561 return;
15563 switch (GET_MODE_SIZE (GET_MODE (x)))
15565 case 1:
15566 putc ('b', file);
15567 return;
15569 case 2:
15570 putc ('w', file);
15571 return;
15573 case 4:
15574 putc ('l', file);
15575 return;
15577 case 8:
15578 putc ('q', file);
15579 return;
15581 default:
15582 output_operand_lossage
15583 ("invalid operand size for operand code 'z'");
15584 return;
15588 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15589 warning
15590 (0, "non-integer operand used with operand code 'z'");
15591 /* FALLTHRU */
15593 case 'Z':
15594 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
15595 if (ASSEMBLER_DIALECT == ASM_INTEL)
15596 return;
15598 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15600 switch (GET_MODE_SIZE (GET_MODE (x)))
15602 case 2:
15603 #ifdef HAVE_AS_IX86_FILDS
15604 putc ('s', file);
15605 #endif
15606 return;
15608 case 4:
15609 putc ('l', file);
15610 return;
15612 case 8:
15613 #ifdef HAVE_AS_IX86_FILDQ
15614 putc ('q', file);
15615 #else
15616 fputs ("ll", file);
15617 #endif
15618 return;
15620 default:
15621 break;
15624 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15626 /* 387 opcodes don't get size suffixes
15627 if the operands are registers. */
15628 if (STACK_REG_P (x))
15629 return;
15631 switch (GET_MODE_SIZE (GET_MODE (x)))
15633 case 4:
15634 putc ('s', file);
15635 return;
15637 case 8:
15638 putc ('l', file);
15639 return;
15641 case 12:
15642 case 16:
15643 putc ('t', file);
15644 return;
15646 default:
15647 break;
15650 else
15652 output_operand_lossage
15653 ("invalid operand type used with operand code 'Z'");
15654 return;
15657 output_operand_lossage
15658 ("invalid operand size for operand code 'Z'");
15659 return;
15661 case 'd':
15662 case 'b':
15663 case 'w':
15664 case 'k':
15665 case 'q':
15666 case 'h':
15667 case 't':
15668 case 'g':
15669 case 'y':
15670 case 'x':
15671 case 'X':
15672 case 'P':
15673 case 'p':
15674 break;
15676 case 's':
15677 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15679 ix86_print_operand (file, x, 0);
15680 fputs (", ", file);
15682 return;
15684 case 'Y':
15685 switch (GET_CODE (x))
15687 case NE:
15688 fputs ("neq", file);
15689 break;
15690 case EQ:
15691 fputs ("eq", file);
15692 break;
15693 case GE:
15694 case GEU:
15695 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15696 break;
15697 case GT:
15698 case GTU:
15699 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15700 break;
15701 case LE:
15702 case LEU:
15703 fputs ("le", file);
15704 break;
15705 case LT:
15706 case LTU:
15707 fputs ("lt", file);
15708 break;
15709 case UNORDERED:
15710 fputs ("unord", file);
15711 break;
15712 case ORDERED:
15713 fputs ("ord", file);
15714 break;
15715 case UNEQ:
15716 fputs ("ueq", file);
15717 break;
15718 case UNGE:
15719 fputs ("nlt", file);
15720 break;
15721 case UNGT:
15722 fputs ("nle", file);
15723 break;
15724 case UNLE:
15725 fputs ("ule", file);
15726 break;
15727 case UNLT:
15728 fputs ("ult", file);
15729 break;
15730 case LTGT:
15731 fputs ("une", file);
15732 break;
15733 default:
15734 output_operand_lossage ("operand is not a condition code, "
15735 "invalid operand code 'Y'");
15736 return;
15738 return;
15740 case 'D':
15741 /* Little bit of braindamage here. The SSE compare instructions
15742 does use completely different names for the comparisons that the
15743 fp conditional moves. */
15744 switch (GET_CODE (x))
15746 case UNEQ:
15747 if (TARGET_AVX)
15749 fputs ("eq_us", file);
15750 break;
15752 case EQ:
15753 fputs ("eq", file);
15754 break;
15755 case UNLT:
15756 if (TARGET_AVX)
15758 fputs ("nge", file);
15759 break;
15761 case LT:
15762 fputs ("lt", file);
15763 break;
15764 case UNLE:
15765 if (TARGET_AVX)
15767 fputs ("ngt", file);
15768 break;
15770 case LE:
15771 fputs ("le", file);
15772 break;
15773 case UNORDERED:
15774 fputs ("unord", file);
15775 break;
15776 case LTGT:
15777 if (TARGET_AVX)
15779 fputs ("neq_oq", file);
15780 break;
15782 case NE:
15783 fputs ("neq", file);
15784 break;
15785 case GE:
15786 if (TARGET_AVX)
15788 fputs ("ge", file);
15789 break;
15791 case UNGE:
15792 fputs ("nlt", file);
15793 break;
15794 case GT:
15795 if (TARGET_AVX)
15797 fputs ("gt", file);
15798 break;
15800 case UNGT:
15801 fputs ("nle", file);
15802 break;
15803 case ORDERED:
15804 fputs ("ord", file);
15805 break;
15806 default:
15807 output_operand_lossage ("operand is not a condition code, "
15808 "invalid operand code 'D'");
15809 return;
15811 return;
15813 case 'F':
15814 case 'f':
15815 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15816 if (ASSEMBLER_DIALECT == ASM_ATT)
15817 putc ('.', file);
15818 #endif
15820 case 'C':
15821 case 'c':
15822 if (!COMPARISON_P (x))
15824 output_operand_lossage ("operand is not a condition code, "
15825 "invalid operand code '%c'", code);
15826 return;
15828 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15829 code == 'c' || code == 'f',
15830 code == 'F' || code == 'f',
15831 file);
15832 return;
15834 case 'H':
15835 if (!offsettable_memref_p (x))
15837 output_operand_lossage ("operand is not an offsettable memory "
15838 "reference, invalid operand code 'H'");
15839 return;
15841 /* It doesn't actually matter what mode we use here, as we're
15842 only going to use this for printing. */
15843 x = adjust_address_nv (x, DImode, 8);
15844 /* Output 'qword ptr' for intel assembler dialect. */
15845 if (ASSEMBLER_DIALECT == ASM_INTEL)
15846 code = 'q';
15847 break;
15849 case 'K':
15850 gcc_assert (CONST_INT_P (x));
15852 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15853 #ifdef HAVE_AS_IX86_HLE
15854 fputs ("xacquire ", file);
15855 #else
15856 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15857 #endif
15858 else if (INTVAL (x) & IX86_HLE_RELEASE)
15859 #ifdef HAVE_AS_IX86_HLE
15860 fputs ("xrelease ", file);
15861 #else
15862 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15863 #endif
15864 /* We do not want to print value of the operand. */
15865 return;
15867 case 'N':
15868 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15869 fputs ("{z}", file);
15870 return;
15872 case 'r':
15873 gcc_assert (CONST_INT_P (x));
15874 gcc_assert (INTVAL (x) == ROUND_SAE);
15876 if (ASSEMBLER_DIALECT == ASM_INTEL)
15877 fputs (", ", file);
15879 fputs ("{sae}", file);
15881 if (ASSEMBLER_DIALECT == ASM_ATT)
15882 fputs (", ", file);
15884 return;
15886 case 'R':
15887 gcc_assert (CONST_INT_P (x));
15889 if (ASSEMBLER_DIALECT == ASM_INTEL)
15890 fputs (", ", file);
15892 switch (INTVAL (x))
15894 case ROUND_NEAREST_INT | ROUND_SAE:
15895 fputs ("{rn-sae}", file);
15896 break;
15897 case ROUND_NEG_INF | ROUND_SAE:
15898 fputs ("{rd-sae}", file);
15899 break;
15900 case ROUND_POS_INF | ROUND_SAE:
15901 fputs ("{ru-sae}", file);
15902 break;
15903 case ROUND_ZERO | ROUND_SAE:
15904 fputs ("{rz-sae}", file);
15905 break;
15906 default:
15907 gcc_unreachable ();
15910 if (ASSEMBLER_DIALECT == ASM_ATT)
15911 fputs (", ", file);
15913 return;
15915 case '*':
15916 if (ASSEMBLER_DIALECT == ASM_ATT)
15917 putc ('*', file);
15918 return;
15920 case '&':
15922 const char *name = get_some_local_dynamic_name ();
15923 if (name == NULL)
15924 output_operand_lossage ("'%%&' used without any "
15925 "local dynamic TLS references");
15926 else
15927 assemble_name (file, name);
15928 return;
15931 case '+':
15933 rtx x;
15935 if (!optimize
15936 || optimize_function_for_size_p (cfun)
15937 || !TARGET_BRANCH_PREDICTION_HINTS)
15938 return;
15940 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15941 if (x)
15943 int pred_val = XINT (x, 0);
15945 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15946 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15948 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15949 bool cputaken
15950 = final_forward_branch_p (current_output_insn) == 0;
15952 /* Emit hints only in the case default branch prediction
15953 heuristics would fail. */
15954 if (taken != cputaken)
15956 /* We use 3e (DS) prefix for taken branches and
15957 2e (CS) prefix for not taken branches. */
15958 if (taken)
15959 fputs ("ds ; ", file);
15960 else
15961 fputs ("cs ; ", file);
15965 return;
15968 case ';':
15969 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15970 putc (';', file);
15971 #endif
15972 return;
15974 case '@':
15975 if (ASSEMBLER_DIALECT == ASM_ATT)
15976 putc ('%', file);
15978 /* The kernel uses a different segment register for performance
15979 reasons; a system call would not have to trash the userspace
15980 segment register, which would be expensive. */
15981 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15982 fputs ("fs", file);
15983 else
15984 fputs ("gs", file);
15985 return;
15987 case '~':
15988 putc (TARGET_AVX2 ? 'i' : 'f', file);
15989 return;
15991 case '^':
15992 if (TARGET_64BIT && Pmode != word_mode)
15993 fputs ("addr32 ", file);
15994 return;
15996 case '!':
15997 if (ix86_bnd_prefixed_insn_p (current_output_insn))
15998 fputs ("bnd ", file);
15999 return;
16001 default:
16002 output_operand_lossage ("invalid operand code '%c'", code);
16006 if (REG_P (x))
16007 print_reg (x, code, file);
16009 else if (MEM_P (x))
16011 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
16012 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
16013 && GET_MODE (x) != BLKmode)
16015 const char * size;
16016 switch (GET_MODE_SIZE (GET_MODE (x)))
16018 case 1: size = "BYTE"; break;
16019 case 2: size = "WORD"; break;
16020 case 4: size = "DWORD"; break;
16021 case 8: size = "QWORD"; break;
16022 case 12: size = "TBYTE"; break;
16023 case 16:
16024 if (GET_MODE (x) == XFmode)
16025 size = "TBYTE";
16026 else
16027 size = "XMMWORD";
16028 break;
16029 case 32: size = "YMMWORD"; break;
16030 case 64: size = "ZMMWORD"; break;
16031 default:
16032 gcc_unreachable ();
16035 /* Check for explicit size override (codes 'b', 'w', 'k',
16036 'q' and 'x') */
16037 if (code == 'b')
16038 size = "BYTE";
16039 else if (code == 'w')
16040 size = "WORD";
16041 else if (code == 'k')
16042 size = "DWORD";
16043 else if (code == 'q')
16044 size = "QWORD";
16045 else if (code == 'x')
16046 size = "XMMWORD";
16048 fputs (size, file);
16049 fputs (" PTR ", file);
16052 x = XEXP (x, 0);
16053 /* Avoid (%rip) for call operands. */
16054 if (CONSTANT_ADDRESS_P (x) && code == 'P'
16055 && !CONST_INT_P (x))
16056 output_addr_const (file, x);
16057 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
16058 output_operand_lossage ("invalid constraints for operand");
16059 else
16060 output_address (x);
16063 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
16065 REAL_VALUE_TYPE r;
16066 long l;
16068 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
16069 REAL_VALUE_TO_TARGET_SINGLE (r, l);
16071 if (ASSEMBLER_DIALECT == ASM_ATT)
16072 putc ('$', file);
16073 /* Sign extend 32bit SFmode immediate to 8 bytes. */
16074 if (code == 'q')
16075 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
16076 (unsigned long long) (int) l);
16077 else
16078 fprintf (file, "0x%08x", (unsigned int) l);
16081 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
16083 REAL_VALUE_TYPE r;
16084 long l[2];
16086 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
16087 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
16089 if (ASSEMBLER_DIALECT == ASM_ATT)
16090 putc ('$', file);
16091 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
16094 /* These float cases don't actually occur as immediate operands. */
16095 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
16097 char dstr[30];
16099 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
16100 fputs (dstr, file);
16103 else
16105 /* We have patterns that allow zero sets of memory, for instance.
16106 In 64-bit mode, we should probably support all 8-byte vectors,
16107 since we can in fact encode that into an immediate. */
16108 if (GET_CODE (x) == CONST_VECTOR)
16110 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
16111 x = const0_rtx;
16114 if (code != 'P' && code != 'p')
16116 if (CONST_INT_P (x))
16118 if (ASSEMBLER_DIALECT == ASM_ATT)
16119 putc ('$', file);
16121 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
16122 || GET_CODE (x) == LABEL_REF)
16124 if (ASSEMBLER_DIALECT == ASM_ATT)
16125 putc ('$', file);
16126 else
16127 fputs ("OFFSET FLAT:", file);
16130 if (CONST_INT_P (x))
16131 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
16132 else if (flag_pic || MACHOPIC_INDIRECT)
16133 output_pic_addr_const (file, x, code);
16134 else
16135 output_addr_const (file, x);
16139 static bool
16140 ix86_print_operand_punct_valid_p (unsigned char code)
16142 return (code == '@' || code == '*' || code == '+' || code == '&'
16143 || code == ';' || code == '~' || code == '^' || code == '!');
16146 /* Print a memory operand whose address is ADDR. */
16148 static void
16149 ix86_print_operand_address (FILE *file, rtx addr)
16151 struct ix86_address parts;
16152 rtx base, index, disp;
16153 int scale;
16154 int ok;
16155 bool vsib = false;
16156 int code = 0;
16158 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
16160 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16161 gcc_assert (parts.index == NULL_RTX);
16162 parts.index = XVECEXP (addr, 0, 1);
16163 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
16164 addr = XVECEXP (addr, 0, 0);
16165 vsib = true;
16167 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
16169 gcc_assert (TARGET_64BIT);
16170 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16171 code = 'q';
16173 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDMK_ADDR)
16175 ok = ix86_decompose_address (XVECEXP (addr, 0, 1), &parts);
16176 gcc_assert (parts.base == NULL_RTX || parts.index == NULL_RTX);
16177 if (parts.base != NULL_RTX)
16179 parts.index = parts.base;
16180 parts.scale = 1;
16182 parts.base = XVECEXP (addr, 0, 0);
16183 addr = XVECEXP (addr, 0, 0);
16185 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDLDX_ADDR)
16187 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16188 gcc_assert (parts.index == NULL_RTX);
16189 parts.index = XVECEXP (addr, 0, 1);
16190 addr = XVECEXP (addr, 0, 0);
16192 else
16193 ok = ix86_decompose_address (addr, &parts);
16195 gcc_assert (ok);
16197 base = parts.base;
16198 index = parts.index;
16199 disp = parts.disp;
16200 scale = parts.scale;
16202 switch (parts.seg)
16204 case SEG_DEFAULT:
16205 break;
16206 case SEG_FS:
16207 case SEG_GS:
16208 if (ASSEMBLER_DIALECT == ASM_ATT)
16209 putc ('%', file);
16210 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
16211 break;
16212 default:
16213 gcc_unreachable ();
16216 /* Use one byte shorter RIP relative addressing for 64bit mode. */
16217 if (TARGET_64BIT && !base && !index)
16219 rtx symbol = disp;
16221 if (GET_CODE (disp) == CONST
16222 && GET_CODE (XEXP (disp, 0)) == PLUS
16223 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16224 symbol = XEXP (XEXP (disp, 0), 0);
16226 if (GET_CODE (symbol) == LABEL_REF
16227 || (GET_CODE (symbol) == SYMBOL_REF
16228 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
16229 base = pc_rtx;
16231 if (!base && !index)
16233 /* Displacement only requires special attention. */
16235 if (CONST_INT_P (disp))
16237 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
16238 fputs ("ds:", file);
16239 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
16241 else if (flag_pic)
16242 output_pic_addr_const (file, disp, 0);
16243 else
16244 output_addr_const (file, disp);
16246 else
16248 /* Print SImode register names to force addr32 prefix. */
16249 if (SImode_address_operand (addr, VOIDmode))
16251 #ifdef ENABLE_CHECKING
16252 gcc_assert (TARGET_64BIT);
16253 switch (GET_CODE (addr))
16255 case SUBREG:
16256 gcc_assert (GET_MODE (addr) == SImode);
16257 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
16258 break;
16259 case ZERO_EXTEND:
16260 case AND:
16261 gcc_assert (GET_MODE (addr) == DImode);
16262 break;
16263 default:
16264 gcc_unreachable ();
16266 #endif
16267 gcc_assert (!code);
16268 code = 'k';
16270 else if (code == 0
16271 && TARGET_X32
16272 && disp
16273 && CONST_INT_P (disp)
16274 && INTVAL (disp) < -16*1024*1024)
16276 /* X32 runs in 64-bit mode, where displacement, DISP, in
16277 address DISP(%r64), is encoded as 32-bit immediate sign-
16278 extended from 32-bit to 64-bit. For -0x40000300(%r64),
16279 address is %r64 + 0xffffffffbffffd00. When %r64 <
16280 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
16281 which is invalid for x32. The correct address is %r64
16282 - 0x40000300 == 0xf7ffdd64. To properly encode
16283 -0x40000300(%r64) for x32, we zero-extend negative
16284 displacement by forcing addr32 prefix which truncates
16285 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
16286 zero-extend all negative displacements, including -1(%rsp).
16287 However, for small negative displacements, sign-extension
16288 won't cause overflow. We only zero-extend negative
16289 displacements if they < -16*1024*1024, which is also used
16290 to check legitimate address displacements for PIC. */
16291 code = 'k';
16294 if (ASSEMBLER_DIALECT == ASM_ATT)
16296 if (disp)
16298 if (flag_pic)
16299 output_pic_addr_const (file, disp, 0);
16300 else if (GET_CODE (disp) == LABEL_REF)
16301 output_asm_label (disp);
16302 else
16303 output_addr_const (file, disp);
16306 putc ('(', file);
16307 if (base)
16308 print_reg (base, code, file);
16309 if (index)
16311 putc (',', file);
16312 print_reg (index, vsib ? 0 : code, file);
16313 if (scale != 1 || vsib)
16314 fprintf (file, ",%d", scale);
16316 putc (')', file);
16318 else
16320 rtx offset = NULL_RTX;
16322 if (disp)
16324 /* Pull out the offset of a symbol; print any symbol itself. */
16325 if (GET_CODE (disp) == CONST
16326 && GET_CODE (XEXP (disp, 0)) == PLUS
16327 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16329 offset = XEXP (XEXP (disp, 0), 1);
16330 disp = gen_rtx_CONST (VOIDmode,
16331 XEXP (XEXP (disp, 0), 0));
16334 if (flag_pic)
16335 output_pic_addr_const (file, disp, 0);
16336 else if (GET_CODE (disp) == LABEL_REF)
16337 output_asm_label (disp);
16338 else if (CONST_INT_P (disp))
16339 offset = disp;
16340 else
16341 output_addr_const (file, disp);
16344 putc ('[', file);
16345 if (base)
16347 print_reg (base, code, file);
16348 if (offset)
16350 if (INTVAL (offset) >= 0)
16351 putc ('+', file);
16352 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16355 else if (offset)
16356 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16357 else
16358 putc ('0', file);
16360 if (index)
16362 putc ('+', file);
16363 print_reg (index, vsib ? 0 : code, file);
16364 if (scale != 1 || vsib)
16365 fprintf (file, "*%d", scale);
16367 putc (']', file);
16372 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
16374 static bool
16375 i386_asm_output_addr_const_extra (FILE *file, rtx x)
16377 rtx op;
16379 if (GET_CODE (x) != UNSPEC)
16380 return false;
16382 op = XVECEXP (x, 0, 0);
16383 switch (XINT (x, 1))
16385 case UNSPEC_GOTTPOFF:
16386 output_addr_const (file, op);
16387 /* FIXME: This might be @TPOFF in Sun ld. */
16388 fputs ("@gottpoff", file);
16389 break;
16390 case UNSPEC_TPOFF:
16391 output_addr_const (file, op);
16392 fputs ("@tpoff", file);
16393 break;
16394 case UNSPEC_NTPOFF:
16395 output_addr_const (file, op);
16396 if (TARGET_64BIT)
16397 fputs ("@tpoff", file);
16398 else
16399 fputs ("@ntpoff", file);
16400 break;
16401 case UNSPEC_DTPOFF:
16402 output_addr_const (file, op);
16403 fputs ("@dtpoff", file);
16404 break;
16405 case UNSPEC_GOTNTPOFF:
16406 output_addr_const (file, op);
16407 if (TARGET_64BIT)
16408 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16409 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
16410 else
16411 fputs ("@gotntpoff", file);
16412 break;
16413 case UNSPEC_INDNTPOFF:
16414 output_addr_const (file, op);
16415 fputs ("@indntpoff", file);
16416 break;
16417 #if TARGET_MACHO
16418 case UNSPEC_MACHOPIC_OFFSET:
16419 output_addr_const (file, op);
16420 putc ('-', file);
16421 machopic_output_function_base_name (file);
16422 break;
16423 #endif
16425 case UNSPEC_STACK_CHECK:
16427 int offset;
16429 gcc_assert (flag_split_stack);
16431 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
16432 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
16433 #else
16434 gcc_unreachable ();
16435 #endif
16437 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
16439 break;
16441 default:
16442 return false;
16445 return true;
16448 /* Split one or more double-mode RTL references into pairs of half-mode
16449 references. The RTL can be REG, offsettable MEM, integer constant, or
16450 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
16451 split and "num" is its length. lo_half and hi_half are output arrays
16452 that parallel "operands". */
16454 void
16455 split_double_mode (machine_mode mode, rtx operands[],
16456 int num, rtx lo_half[], rtx hi_half[])
16458 machine_mode half_mode;
16459 unsigned int byte;
16461 switch (mode)
16463 case TImode:
16464 half_mode = DImode;
16465 break;
16466 case DImode:
16467 half_mode = SImode;
16468 break;
16469 default:
16470 gcc_unreachable ();
16473 byte = GET_MODE_SIZE (half_mode);
16475 while (num--)
16477 rtx op = operands[num];
16479 /* simplify_subreg refuse to split volatile memory addresses,
16480 but we still have to handle it. */
16481 if (MEM_P (op))
16483 lo_half[num] = adjust_address (op, half_mode, 0);
16484 hi_half[num] = adjust_address (op, half_mode, byte);
16486 else
16488 lo_half[num] = simplify_gen_subreg (half_mode, op,
16489 GET_MODE (op) == VOIDmode
16490 ? mode : GET_MODE (op), 0);
16491 hi_half[num] = simplify_gen_subreg (half_mode, op,
16492 GET_MODE (op) == VOIDmode
16493 ? mode : GET_MODE (op), byte);
16498 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
16499 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
16500 is the expression of the binary operation. The output may either be
16501 emitted here, or returned to the caller, like all output_* functions.
16503 There is no guarantee that the operands are the same mode, as they
16504 might be within FLOAT or FLOAT_EXTEND expressions. */
16506 #ifndef SYSV386_COMPAT
16507 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
16508 wants to fix the assemblers because that causes incompatibility
16509 with gcc. No-one wants to fix gcc because that causes
16510 incompatibility with assemblers... You can use the option of
16511 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
16512 #define SYSV386_COMPAT 1
16513 #endif
16515 const char *
16516 output_387_binary_op (rtx insn, rtx *operands)
16518 static char buf[40];
16519 const char *p;
16520 const char *ssep;
16521 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
16523 #ifdef ENABLE_CHECKING
16524 /* Even if we do not want to check the inputs, this documents input
16525 constraints. Which helps in understanding the following code. */
16526 if (STACK_REG_P (operands[0])
16527 && ((REG_P (operands[1])
16528 && REGNO (operands[0]) == REGNO (operands[1])
16529 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
16530 || (REG_P (operands[2])
16531 && REGNO (operands[0]) == REGNO (operands[2])
16532 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
16533 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
16534 ; /* ok */
16535 else
16536 gcc_assert (is_sse);
16537 #endif
16539 switch (GET_CODE (operands[3]))
16541 case PLUS:
16542 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16543 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16544 p = "fiadd";
16545 else
16546 p = "fadd";
16547 ssep = "vadd";
16548 break;
16550 case MINUS:
16551 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16552 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16553 p = "fisub";
16554 else
16555 p = "fsub";
16556 ssep = "vsub";
16557 break;
16559 case MULT:
16560 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16561 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16562 p = "fimul";
16563 else
16564 p = "fmul";
16565 ssep = "vmul";
16566 break;
16568 case DIV:
16569 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16570 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16571 p = "fidiv";
16572 else
16573 p = "fdiv";
16574 ssep = "vdiv";
16575 break;
16577 default:
16578 gcc_unreachable ();
16581 if (is_sse)
16583 if (TARGET_AVX)
16585 strcpy (buf, ssep);
16586 if (GET_MODE (operands[0]) == SFmode)
16587 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
16588 else
16589 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
16591 else
16593 strcpy (buf, ssep + 1);
16594 if (GET_MODE (operands[0]) == SFmode)
16595 strcat (buf, "ss\t{%2, %0|%0, %2}");
16596 else
16597 strcat (buf, "sd\t{%2, %0|%0, %2}");
16599 return buf;
16601 strcpy (buf, p);
16603 switch (GET_CODE (operands[3]))
16605 case MULT:
16606 case PLUS:
16607 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
16608 std::swap (operands[1], operands[2]);
16610 /* know operands[0] == operands[1]. */
16612 if (MEM_P (operands[2]))
16614 p = "%Z2\t%2";
16615 break;
16618 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16620 if (STACK_TOP_P (operands[0]))
16621 /* How is it that we are storing to a dead operand[2]?
16622 Well, presumably operands[1] is dead too. We can't
16623 store the result to st(0) as st(0) gets popped on this
16624 instruction. Instead store to operands[2] (which I
16625 think has to be st(1)). st(1) will be popped later.
16626 gcc <= 2.8.1 didn't have this check and generated
16627 assembly code that the Unixware assembler rejected. */
16628 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16629 else
16630 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16631 break;
16634 if (STACK_TOP_P (operands[0]))
16635 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16636 else
16637 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16638 break;
16640 case MINUS:
16641 case DIV:
16642 if (MEM_P (operands[1]))
16644 p = "r%Z1\t%1";
16645 break;
16648 if (MEM_P (operands[2]))
16650 p = "%Z2\t%2";
16651 break;
16654 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16656 #if SYSV386_COMPAT
16657 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
16658 derived assemblers, confusingly reverse the direction of
16659 the operation for fsub{r} and fdiv{r} when the
16660 destination register is not st(0). The Intel assembler
16661 doesn't have this brain damage. Read !SYSV386_COMPAT to
16662 figure out what the hardware really does. */
16663 if (STACK_TOP_P (operands[0]))
16664 p = "{p\t%0, %2|rp\t%2, %0}";
16665 else
16666 p = "{rp\t%2, %0|p\t%0, %2}";
16667 #else
16668 if (STACK_TOP_P (operands[0]))
16669 /* As above for fmul/fadd, we can't store to st(0). */
16670 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16671 else
16672 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16673 #endif
16674 break;
16677 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16679 #if SYSV386_COMPAT
16680 if (STACK_TOP_P (operands[0]))
16681 p = "{rp\t%0, %1|p\t%1, %0}";
16682 else
16683 p = "{p\t%1, %0|rp\t%0, %1}";
16684 #else
16685 if (STACK_TOP_P (operands[0]))
16686 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16687 else
16688 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16689 #endif
16690 break;
16693 if (STACK_TOP_P (operands[0]))
16695 if (STACK_TOP_P (operands[1]))
16696 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16697 else
16698 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16699 break;
16701 else if (STACK_TOP_P (operands[1]))
16703 #if SYSV386_COMPAT
16704 p = "{\t%1, %0|r\t%0, %1}";
16705 #else
16706 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16707 #endif
16709 else
16711 #if SYSV386_COMPAT
16712 p = "{r\t%2, %0|\t%0, %2}";
16713 #else
16714 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16715 #endif
16717 break;
16719 default:
16720 gcc_unreachable ();
16723 strcat (buf, p);
16724 return buf;
16727 /* Check if a 256bit AVX register is referenced inside of EXP. */
16729 static bool
16730 ix86_check_avx256_register (const_rtx exp)
16732 if (SUBREG_P (exp))
16733 exp = SUBREG_REG (exp);
16735 return (REG_P (exp)
16736 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)));
16739 /* Return needed mode for entity in optimize_mode_switching pass. */
16741 static int
16742 ix86_avx_u128_mode_needed (rtx_insn *insn)
16744 if (CALL_P (insn))
16746 rtx link;
16748 /* Needed mode is set to AVX_U128_CLEAN if there are
16749 no 256bit modes used in function arguments. */
16750 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16751 link;
16752 link = XEXP (link, 1))
16754 if (GET_CODE (XEXP (link, 0)) == USE)
16756 rtx arg = XEXP (XEXP (link, 0), 0);
16758 if (ix86_check_avx256_register (arg))
16759 return AVX_U128_DIRTY;
16763 return AVX_U128_CLEAN;
16766 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16767 changes state only when a 256bit register is written to, but we need
16768 to prevent the compiler from moving optimal insertion point above
16769 eventual read from 256bit register. */
16770 subrtx_iterator::array_type array;
16771 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
16772 if (ix86_check_avx256_register (*iter))
16773 return AVX_U128_DIRTY;
16775 return AVX_U128_ANY;
16778 /* Return mode that i387 must be switched into
16779 prior to the execution of insn. */
16781 static int
16782 ix86_i387_mode_needed (int entity, rtx_insn *insn)
16784 enum attr_i387_cw mode;
16786 /* The mode UNINITIALIZED is used to store control word after a
16787 function call or ASM pattern. The mode ANY specify that function
16788 has no requirements on the control word and make no changes in the
16789 bits we are interested in. */
16791 if (CALL_P (insn)
16792 || (NONJUMP_INSN_P (insn)
16793 && (asm_noperands (PATTERN (insn)) >= 0
16794 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16795 return I387_CW_UNINITIALIZED;
16797 if (recog_memoized (insn) < 0)
16798 return I387_CW_ANY;
16800 mode = get_attr_i387_cw (insn);
16802 switch (entity)
16804 case I387_TRUNC:
16805 if (mode == I387_CW_TRUNC)
16806 return mode;
16807 break;
16809 case I387_FLOOR:
16810 if (mode == I387_CW_FLOOR)
16811 return mode;
16812 break;
16814 case I387_CEIL:
16815 if (mode == I387_CW_CEIL)
16816 return mode;
16817 break;
16819 case I387_MASK_PM:
16820 if (mode == I387_CW_MASK_PM)
16821 return mode;
16822 break;
16824 default:
16825 gcc_unreachable ();
16828 return I387_CW_ANY;
16831 /* Return mode that entity must be switched into
16832 prior to the execution of insn. */
16834 static int
16835 ix86_mode_needed (int entity, rtx_insn *insn)
16837 switch (entity)
16839 case AVX_U128:
16840 return ix86_avx_u128_mode_needed (insn);
16841 case I387_TRUNC:
16842 case I387_FLOOR:
16843 case I387_CEIL:
16844 case I387_MASK_PM:
16845 return ix86_i387_mode_needed (entity, insn);
16846 default:
16847 gcc_unreachable ();
16849 return 0;
16852 /* Check if a 256bit AVX register is referenced in stores. */
16854 static void
16855 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
16857 if (ix86_check_avx256_register (dest))
16859 bool *used = (bool *) data;
16860 *used = true;
16864 /* Calculate mode of upper 128bit AVX registers after the insn. */
16866 static int
16867 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
16869 rtx pat = PATTERN (insn);
16871 if (vzeroupper_operation (pat, VOIDmode)
16872 || vzeroall_operation (pat, VOIDmode))
16873 return AVX_U128_CLEAN;
16875 /* We know that state is clean after CALL insn if there are no
16876 256bit registers used in the function return register. */
16877 if (CALL_P (insn))
16879 bool avx_reg256_found = false;
16880 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16882 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16885 /* Otherwise, return current mode. Remember that if insn
16886 references AVX 256bit registers, the mode was already changed
16887 to DIRTY from MODE_NEEDED. */
16888 return mode;
16891 /* Return the mode that an insn results in. */
16893 static int
16894 ix86_mode_after (int entity, int mode, rtx_insn *insn)
16896 switch (entity)
16898 case AVX_U128:
16899 return ix86_avx_u128_mode_after (mode, insn);
16900 case I387_TRUNC:
16901 case I387_FLOOR:
16902 case I387_CEIL:
16903 case I387_MASK_PM:
16904 return mode;
16905 default:
16906 gcc_unreachable ();
16910 static int
16911 ix86_avx_u128_mode_entry (void)
16913 tree arg;
16915 /* Entry mode is set to AVX_U128_DIRTY if there are
16916 256bit modes used in function arguments. */
16917 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16918 arg = TREE_CHAIN (arg))
16920 rtx incoming = DECL_INCOMING_RTL (arg);
16922 if (incoming && ix86_check_avx256_register (incoming))
16923 return AVX_U128_DIRTY;
16926 return AVX_U128_CLEAN;
16929 /* Return a mode that ENTITY is assumed to be
16930 switched to at function entry. */
16932 static int
16933 ix86_mode_entry (int entity)
16935 switch (entity)
16937 case AVX_U128:
16938 return ix86_avx_u128_mode_entry ();
16939 case I387_TRUNC:
16940 case I387_FLOOR:
16941 case I387_CEIL:
16942 case I387_MASK_PM:
16943 return I387_CW_ANY;
16944 default:
16945 gcc_unreachable ();
16949 static int
16950 ix86_avx_u128_mode_exit (void)
16952 rtx reg = crtl->return_rtx;
16954 /* Exit mode is set to AVX_U128_DIRTY if there are
16955 256bit modes used in the function return register. */
16956 if (reg && ix86_check_avx256_register (reg))
16957 return AVX_U128_DIRTY;
16959 return AVX_U128_CLEAN;
16962 /* Return a mode that ENTITY is assumed to be
16963 switched to at function exit. */
16965 static int
16966 ix86_mode_exit (int entity)
16968 switch (entity)
16970 case AVX_U128:
16971 return ix86_avx_u128_mode_exit ();
16972 case I387_TRUNC:
16973 case I387_FLOOR:
16974 case I387_CEIL:
16975 case I387_MASK_PM:
16976 return I387_CW_ANY;
16977 default:
16978 gcc_unreachable ();
16982 static int
16983 ix86_mode_priority (int, int n)
16985 return n;
16988 /* Output code to initialize control word copies used by trunc?f?i and
16989 rounding patterns. CURRENT_MODE is set to current control word,
16990 while NEW_MODE is set to new control word. */
16992 static void
16993 emit_i387_cw_initialization (int mode)
16995 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16996 rtx new_mode;
16998 enum ix86_stack_slot slot;
17000 rtx reg = gen_reg_rtx (HImode);
17002 emit_insn (gen_x86_fnstcw_1 (stored_mode));
17003 emit_move_insn (reg, copy_rtx (stored_mode));
17005 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
17006 || optimize_insn_for_size_p ())
17008 switch (mode)
17010 case I387_CW_TRUNC:
17011 /* round toward zero (truncate) */
17012 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
17013 slot = SLOT_CW_TRUNC;
17014 break;
17016 case I387_CW_FLOOR:
17017 /* round down toward -oo */
17018 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
17019 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
17020 slot = SLOT_CW_FLOOR;
17021 break;
17023 case I387_CW_CEIL:
17024 /* round up toward +oo */
17025 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
17026 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
17027 slot = SLOT_CW_CEIL;
17028 break;
17030 case I387_CW_MASK_PM:
17031 /* mask precision exception for nearbyint() */
17032 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
17033 slot = SLOT_CW_MASK_PM;
17034 break;
17036 default:
17037 gcc_unreachable ();
17040 else
17042 switch (mode)
17044 case I387_CW_TRUNC:
17045 /* round toward zero (truncate) */
17046 emit_insn (gen_insvsi_1 (reg, GEN_INT (0xc)));
17047 slot = SLOT_CW_TRUNC;
17048 break;
17050 case I387_CW_FLOOR:
17051 /* round down toward -oo */
17052 emit_insn (gen_insvsi_1 (reg, GEN_INT (0x4)));
17053 slot = SLOT_CW_FLOOR;
17054 break;
17056 case I387_CW_CEIL:
17057 /* round up toward +oo */
17058 emit_insn (gen_insvsi_1 (reg, GEN_INT (0x8)));
17059 slot = SLOT_CW_CEIL;
17060 break;
17062 case I387_CW_MASK_PM:
17063 /* mask precision exception for nearbyint() */
17064 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
17065 slot = SLOT_CW_MASK_PM;
17066 break;
17068 default:
17069 gcc_unreachable ();
17073 gcc_assert (slot < MAX_386_STACK_LOCALS);
17075 new_mode = assign_386_stack_local (HImode, slot);
17076 emit_move_insn (new_mode, reg);
17079 /* Emit vzeroupper. */
17081 void
17082 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
17084 int i;
17086 /* Cancel automatic vzeroupper insertion if there are
17087 live call-saved SSE registers at the insertion point. */
17089 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
17090 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
17091 return;
17093 if (TARGET_64BIT)
17094 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
17095 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
17096 return;
17098 emit_insn (gen_avx_vzeroupper ());
17101 /* Generate one or more insns to set ENTITY to MODE. */
17103 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
17104 is the set of hard registers live at the point where the insn(s)
17105 are to be inserted. */
17107 static void
17108 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
17109 HARD_REG_SET regs_live)
17111 switch (entity)
17113 case AVX_U128:
17114 if (mode == AVX_U128_CLEAN)
17115 ix86_avx_emit_vzeroupper (regs_live);
17116 break;
17117 case I387_TRUNC:
17118 case I387_FLOOR:
17119 case I387_CEIL:
17120 case I387_MASK_PM:
17121 if (mode != I387_CW_ANY
17122 && mode != I387_CW_UNINITIALIZED)
17123 emit_i387_cw_initialization (mode);
17124 break;
17125 default:
17126 gcc_unreachable ();
17130 /* Output code for INSN to convert a float to a signed int. OPERANDS
17131 are the insn operands. The output may be [HSD]Imode and the input
17132 operand may be [SDX]Fmode. */
17134 const char *
17135 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
17137 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
17138 int dimode_p = GET_MODE (operands[0]) == DImode;
17139 int round_mode = get_attr_i387_cw (insn);
17141 /* Jump through a hoop or two for DImode, since the hardware has no
17142 non-popping instruction. We used to do this a different way, but
17143 that was somewhat fragile and broke with post-reload splitters. */
17144 if ((dimode_p || fisttp) && !stack_top_dies)
17145 output_asm_insn ("fld\t%y1", operands);
17147 gcc_assert (STACK_TOP_P (operands[1]));
17148 gcc_assert (MEM_P (operands[0]));
17149 gcc_assert (GET_MODE (operands[1]) != TFmode);
17151 if (fisttp)
17152 output_asm_insn ("fisttp%Z0\t%0", operands);
17153 else
17155 if (round_mode != I387_CW_ANY)
17156 output_asm_insn ("fldcw\t%3", operands);
17157 if (stack_top_dies || dimode_p)
17158 output_asm_insn ("fistp%Z0\t%0", operands);
17159 else
17160 output_asm_insn ("fist%Z0\t%0", operands);
17161 if (round_mode != I387_CW_ANY)
17162 output_asm_insn ("fldcw\t%2", operands);
17165 return "";
17168 /* Output code for x87 ffreep insn. The OPNO argument, which may only
17169 have the values zero or one, indicates the ffreep insn's operand
17170 from the OPERANDS array. */
17172 static const char *
17173 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
17175 if (TARGET_USE_FFREEP)
17176 #ifdef HAVE_AS_IX86_FFREEP
17177 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
17178 #else
17180 static char retval[32];
17181 int regno = REGNO (operands[opno]);
17183 gcc_assert (STACK_REGNO_P (regno));
17185 regno -= FIRST_STACK_REG;
17187 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
17188 return retval;
17190 #endif
17192 return opno ? "fstp\t%y1" : "fstp\t%y0";
17196 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
17197 should be used. UNORDERED_P is true when fucom should be used. */
17199 const char *
17200 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
17202 int stack_top_dies;
17203 rtx cmp_op0, cmp_op1;
17204 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
17206 if (eflags_p)
17208 cmp_op0 = operands[0];
17209 cmp_op1 = operands[1];
17211 else
17213 cmp_op0 = operands[1];
17214 cmp_op1 = operands[2];
17217 if (is_sse)
17219 if (GET_MODE (operands[0]) == SFmode)
17220 if (unordered_p)
17221 return "%vucomiss\t{%1, %0|%0, %1}";
17222 else
17223 return "%vcomiss\t{%1, %0|%0, %1}";
17224 else
17225 if (unordered_p)
17226 return "%vucomisd\t{%1, %0|%0, %1}";
17227 else
17228 return "%vcomisd\t{%1, %0|%0, %1}";
17231 gcc_assert (STACK_TOP_P (cmp_op0));
17233 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
17235 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
17237 if (stack_top_dies)
17239 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
17240 return output_387_ffreep (operands, 1);
17242 else
17243 return "ftst\n\tfnstsw\t%0";
17246 if (STACK_REG_P (cmp_op1)
17247 && stack_top_dies
17248 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
17249 && REGNO (cmp_op1) != FIRST_STACK_REG)
17251 /* If both the top of the 387 stack dies, and the other operand
17252 is also a stack register that dies, then this must be a
17253 `fcompp' float compare */
17255 if (eflags_p)
17257 /* There is no double popping fcomi variant. Fortunately,
17258 eflags is immune from the fstp's cc clobbering. */
17259 if (unordered_p)
17260 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
17261 else
17262 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
17263 return output_387_ffreep (operands, 0);
17265 else
17267 if (unordered_p)
17268 return "fucompp\n\tfnstsw\t%0";
17269 else
17270 return "fcompp\n\tfnstsw\t%0";
17273 else
17275 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
17277 static const char * const alt[16] =
17279 "fcom%Z2\t%y2\n\tfnstsw\t%0",
17280 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
17281 "fucom%Z2\t%y2\n\tfnstsw\t%0",
17282 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
17284 "ficom%Z2\t%y2\n\tfnstsw\t%0",
17285 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
17286 NULL,
17287 NULL,
17289 "fcomi\t{%y1, %0|%0, %y1}",
17290 "fcomip\t{%y1, %0|%0, %y1}",
17291 "fucomi\t{%y1, %0|%0, %y1}",
17292 "fucomip\t{%y1, %0|%0, %y1}",
17294 NULL,
17295 NULL,
17296 NULL,
17297 NULL
17300 int mask;
17301 const char *ret;
17303 mask = eflags_p << 3;
17304 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
17305 mask |= unordered_p << 1;
17306 mask |= stack_top_dies;
17308 gcc_assert (mask < 16);
17309 ret = alt[mask];
17310 gcc_assert (ret);
17312 return ret;
17316 void
17317 ix86_output_addr_vec_elt (FILE *file, int value)
17319 const char *directive = ASM_LONG;
17321 #ifdef ASM_QUAD
17322 if (TARGET_LP64)
17323 directive = ASM_QUAD;
17324 #else
17325 gcc_assert (!TARGET_64BIT);
17326 #endif
17328 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
17331 void
17332 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
17334 const char *directive = ASM_LONG;
17336 #ifdef ASM_QUAD
17337 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
17338 directive = ASM_QUAD;
17339 #else
17340 gcc_assert (!TARGET_64BIT);
17341 #endif
17342 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
17343 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
17344 fprintf (file, "%s%s%d-%s%d\n",
17345 directive, LPREFIX, value, LPREFIX, rel);
17346 else if (HAVE_AS_GOTOFF_IN_DATA)
17347 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
17348 #if TARGET_MACHO
17349 else if (TARGET_MACHO)
17351 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
17352 machopic_output_function_base_name (file);
17353 putc ('\n', file);
17355 #endif
17356 else
17357 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
17358 GOT_SYMBOL_NAME, LPREFIX, value);
17361 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
17362 for the target. */
17364 void
17365 ix86_expand_clear (rtx dest)
17367 rtx tmp;
17369 /* We play register width games, which are only valid after reload. */
17370 gcc_assert (reload_completed);
17372 /* Avoid HImode and its attendant prefix byte. */
17373 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
17374 dest = gen_rtx_REG (SImode, REGNO (dest));
17375 tmp = gen_rtx_SET (dest, const0_rtx);
17377 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
17379 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17380 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
17383 emit_insn (tmp);
17386 /* X is an unchanging MEM. If it is a constant pool reference, return
17387 the constant pool rtx, else NULL. */
17390 maybe_get_pool_constant (rtx x)
17392 x = ix86_delegitimize_address (XEXP (x, 0));
17394 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
17395 return get_pool_constant (x);
17397 return NULL_RTX;
17400 void
17401 ix86_expand_move (machine_mode mode, rtx operands[])
17403 rtx op0, op1;
17404 enum tls_model model;
17406 op0 = operands[0];
17407 op1 = operands[1];
17409 if (GET_CODE (op1) == SYMBOL_REF)
17411 rtx tmp;
17413 model = SYMBOL_REF_TLS_MODEL (op1);
17414 if (model)
17416 op1 = legitimize_tls_address (op1, model, true);
17417 op1 = force_operand (op1, op0);
17418 if (op1 == op0)
17419 return;
17420 op1 = convert_to_mode (mode, op1, 1);
17422 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
17423 op1 = tmp;
17425 else if (GET_CODE (op1) == CONST
17426 && GET_CODE (XEXP (op1, 0)) == PLUS
17427 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
17429 rtx addend = XEXP (XEXP (op1, 0), 1);
17430 rtx symbol = XEXP (XEXP (op1, 0), 0);
17431 rtx tmp;
17433 model = SYMBOL_REF_TLS_MODEL (symbol);
17434 if (model)
17435 tmp = legitimize_tls_address (symbol, model, true);
17436 else
17437 tmp = legitimize_pe_coff_symbol (symbol, true);
17439 if (tmp)
17441 tmp = force_operand (tmp, NULL);
17442 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
17443 op0, 1, OPTAB_DIRECT);
17444 if (tmp == op0)
17445 return;
17446 op1 = convert_to_mode (mode, tmp, 1);
17450 if ((flag_pic || MACHOPIC_INDIRECT)
17451 && symbolic_operand (op1, mode))
17453 if (TARGET_MACHO && !TARGET_64BIT)
17455 #if TARGET_MACHO
17456 /* dynamic-no-pic */
17457 if (MACHOPIC_INDIRECT)
17459 rtx temp = (op0 && REG_P (op0) && mode == Pmode)
17460 ? op0 : gen_reg_rtx (Pmode);
17461 op1 = machopic_indirect_data_reference (op1, temp);
17462 if (MACHOPIC_PURE)
17463 op1 = machopic_legitimize_pic_address (op1, mode,
17464 temp == op1 ? 0 : temp);
17466 if (op0 != op1 && GET_CODE (op0) != MEM)
17468 rtx insn = gen_rtx_SET (op0, op1);
17469 emit_insn (insn);
17470 return;
17472 if (GET_CODE (op0) == MEM)
17473 op1 = force_reg (Pmode, op1);
17474 else
17476 rtx temp = op0;
17477 if (GET_CODE (temp) != REG)
17478 temp = gen_reg_rtx (Pmode);
17479 temp = legitimize_pic_address (op1, temp);
17480 if (temp == op0)
17481 return;
17482 op1 = temp;
17484 /* dynamic-no-pic */
17485 #endif
17487 else
17489 if (MEM_P (op0))
17490 op1 = force_reg (mode, op1);
17491 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
17493 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
17494 op1 = legitimize_pic_address (op1, reg);
17495 if (op0 == op1)
17496 return;
17497 op1 = convert_to_mode (mode, op1, 1);
17501 else
17503 if (MEM_P (op0)
17504 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
17505 || !push_operand (op0, mode))
17506 && MEM_P (op1))
17507 op1 = force_reg (mode, op1);
17509 if (push_operand (op0, mode)
17510 && ! general_no_elim_operand (op1, mode))
17511 op1 = copy_to_mode_reg (mode, op1);
17513 /* Force large constants in 64bit compilation into register
17514 to get them CSEed. */
17515 if (can_create_pseudo_p ()
17516 && (mode == DImode) && TARGET_64BIT
17517 && immediate_operand (op1, mode)
17518 && !x86_64_zext_immediate_operand (op1, VOIDmode)
17519 && !register_operand (op0, mode)
17520 && optimize)
17521 op1 = copy_to_mode_reg (mode, op1);
17523 if (can_create_pseudo_p ()
17524 && CONST_DOUBLE_P (op1))
17526 /* If we are loading a floating point constant to a register,
17527 force the value to memory now, since we'll get better code
17528 out the back end. */
17530 op1 = validize_mem (force_const_mem (mode, op1));
17531 if (!register_operand (op0, mode))
17533 rtx temp = gen_reg_rtx (mode);
17534 emit_insn (gen_rtx_SET (temp, op1));
17535 emit_move_insn (op0, temp);
17536 return;
17541 emit_insn (gen_rtx_SET (op0, op1));
17544 void
17545 ix86_expand_vector_move (machine_mode mode, rtx operands[])
17547 rtx op0 = operands[0], op1 = operands[1];
17548 unsigned int align = GET_MODE_ALIGNMENT (mode);
17550 if (push_operand (op0, VOIDmode))
17551 op0 = emit_move_resolve_push (mode, op0);
17553 /* Force constants other than zero into memory. We do not know how
17554 the instructions used to build constants modify the upper 64 bits
17555 of the register, once we have that information we may be able
17556 to handle some of them more efficiently. */
17557 if (can_create_pseudo_p ()
17558 && register_operand (op0, mode)
17559 && (CONSTANT_P (op1)
17560 || (SUBREG_P (op1)
17561 && CONSTANT_P (SUBREG_REG (op1))))
17562 && !standard_sse_constant_p (op1))
17563 op1 = validize_mem (force_const_mem (mode, op1));
17565 /* We need to check memory alignment for SSE mode since attribute
17566 can make operands unaligned. */
17567 if (can_create_pseudo_p ()
17568 && SSE_REG_MODE_P (mode)
17569 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
17570 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
17572 rtx tmp[2];
17574 /* ix86_expand_vector_move_misalign() does not like constants ... */
17575 if (CONSTANT_P (op1)
17576 || (SUBREG_P (op1)
17577 && CONSTANT_P (SUBREG_REG (op1))))
17578 op1 = validize_mem (force_const_mem (mode, op1));
17580 /* ... nor both arguments in memory. */
17581 if (!register_operand (op0, mode)
17582 && !register_operand (op1, mode))
17583 op1 = force_reg (mode, op1);
17585 tmp[0] = op0; tmp[1] = op1;
17586 ix86_expand_vector_move_misalign (mode, tmp);
17587 return;
17590 /* Make operand1 a register if it isn't already. */
17591 if (can_create_pseudo_p ()
17592 && !register_operand (op0, mode)
17593 && !register_operand (op1, mode))
17595 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
17596 return;
17599 emit_insn (gen_rtx_SET (op0, op1));
17602 /* Split 32-byte AVX unaligned load and store if needed. */
17604 static void
17605 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
17607 rtx m;
17608 rtx (*extract) (rtx, rtx, rtx);
17609 rtx (*load_unaligned) (rtx, rtx);
17610 rtx (*store_unaligned) (rtx, rtx);
17611 machine_mode mode;
17613 switch (GET_MODE (op0))
17615 default:
17616 gcc_unreachable ();
17617 case V32QImode:
17618 extract = gen_avx_vextractf128v32qi;
17619 load_unaligned = gen_avx_loaddquv32qi;
17620 store_unaligned = gen_avx_storedquv32qi;
17621 mode = V16QImode;
17622 break;
17623 case V8SFmode:
17624 extract = gen_avx_vextractf128v8sf;
17625 load_unaligned = gen_avx_loadups256;
17626 store_unaligned = gen_avx_storeups256;
17627 mode = V4SFmode;
17628 break;
17629 case V4DFmode:
17630 extract = gen_avx_vextractf128v4df;
17631 load_unaligned = gen_avx_loadupd256;
17632 store_unaligned = gen_avx_storeupd256;
17633 mode = V2DFmode;
17634 break;
17637 if (MEM_P (op1))
17639 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
17640 && optimize_insn_for_speed_p ())
17642 rtx r = gen_reg_rtx (mode);
17643 m = adjust_address (op1, mode, 0);
17644 emit_move_insn (r, m);
17645 m = adjust_address (op1, mode, 16);
17646 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
17647 emit_move_insn (op0, r);
17649 /* Normal *mov<mode>_internal pattern will handle
17650 unaligned loads just fine if misaligned_operand
17651 is true, and without the UNSPEC it can be combined
17652 with arithmetic instructions. */
17653 else if (misaligned_operand (op1, GET_MODE (op1)))
17654 emit_insn (gen_rtx_SET (op0, op1));
17655 else
17656 emit_insn (load_unaligned (op0, op1));
17658 else if (MEM_P (op0))
17660 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE
17661 && optimize_insn_for_speed_p ())
17663 m = adjust_address (op0, mode, 0);
17664 emit_insn (extract (m, op1, const0_rtx));
17665 m = adjust_address (op0, mode, 16);
17666 emit_insn (extract (m, op1, const1_rtx));
17668 else
17669 emit_insn (store_unaligned (op0, op1));
17671 else
17672 gcc_unreachable ();
17675 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17676 straight to ix86_expand_vector_move. */
17677 /* Code generation for scalar reg-reg moves of single and double precision data:
17678 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17679 movaps reg, reg
17680 else
17681 movss reg, reg
17682 if (x86_sse_partial_reg_dependency == true)
17683 movapd reg, reg
17684 else
17685 movsd reg, reg
17687 Code generation for scalar loads of double precision data:
17688 if (x86_sse_split_regs == true)
17689 movlpd mem, reg (gas syntax)
17690 else
17691 movsd mem, reg
17693 Code generation for unaligned packed loads of single precision data
17694 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17695 if (x86_sse_unaligned_move_optimal)
17696 movups mem, reg
17698 if (x86_sse_partial_reg_dependency == true)
17700 xorps reg, reg
17701 movlps mem, reg
17702 movhps mem+8, reg
17704 else
17706 movlps mem, reg
17707 movhps mem+8, reg
17710 Code generation for unaligned packed loads of double precision data
17711 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17712 if (x86_sse_unaligned_move_optimal)
17713 movupd mem, reg
17715 if (x86_sse_split_regs == true)
17717 movlpd mem, reg
17718 movhpd mem+8, reg
17720 else
17722 movsd mem, reg
17723 movhpd mem+8, reg
17727 void
17728 ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
17730 rtx op0, op1, orig_op0 = NULL_RTX, m;
17731 rtx (*load_unaligned) (rtx, rtx);
17732 rtx (*store_unaligned) (rtx, rtx);
17734 op0 = operands[0];
17735 op1 = operands[1];
17737 if (GET_MODE_SIZE (mode) == 64)
17739 switch (GET_MODE_CLASS (mode))
17741 case MODE_VECTOR_INT:
17742 case MODE_INT:
17743 if (GET_MODE (op0) != V16SImode)
17745 if (!MEM_P (op0))
17747 orig_op0 = op0;
17748 op0 = gen_reg_rtx (V16SImode);
17750 else
17751 op0 = gen_lowpart (V16SImode, op0);
17753 op1 = gen_lowpart (V16SImode, op1);
17754 /* FALLTHRU */
17756 case MODE_VECTOR_FLOAT:
17757 switch (GET_MODE (op0))
17759 default:
17760 gcc_unreachable ();
17761 case V16SImode:
17762 load_unaligned = gen_avx512f_loaddquv16si;
17763 store_unaligned = gen_avx512f_storedquv16si;
17764 break;
17765 case V16SFmode:
17766 load_unaligned = gen_avx512f_loadups512;
17767 store_unaligned = gen_avx512f_storeups512;
17768 break;
17769 case V8DFmode:
17770 load_unaligned = gen_avx512f_loadupd512;
17771 store_unaligned = gen_avx512f_storeupd512;
17772 break;
17775 if (MEM_P (op1))
17776 emit_insn (load_unaligned (op0, op1));
17777 else if (MEM_P (op0))
17778 emit_insn (store_unaligned (op0, op1));
17779 else
17780 gcc_unreachable ();
17781 if (orig_op0)
17782 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17783 break;
17785 default:
17786 gcc_unreachable ();
17789 return;
17792 if (TARGET_AVX
17793 && GET_MODE_SIZE (mode) == 32)
17795 switch (GET_MODE_CLASS (mode))
17797 case MODE_VECTOR_INT:
17798 case MODE_INT:
17799 if (GET_MODE (op0) != V32QImode)
17801 if (!MEM_P (op0))
17803 orig_op0 = op0;
17804 op0 = gen_reg_rtx (V32QImode);
17806 else
17807 op0 = gen_lowpart (V32QImode, op0);
17809 op1 = gen_lowpart (V32QImode, op1);
17810 /* FALLTHRU */
17812 case MODE_VECTOR_FLOAT:
17813 ix86_avx256_split_vector_move_misalign (op0, op1);
17814 if (orig_op0)
17815 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17816 break;
17818 default:
17819 gcc_unreachable ();
17822 return;
17825 if (MEM_P (op1))
17827 /* Normal *mov<mode>_internal pattern will handle
17828 unaligned loads just fine if misaligned_operand
17829 is true, and without the UNSPEC it can be combined
17830 with arithmetic instructions. */
17831 if (TARGET_AVX
17832 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17833 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17834 && misaligned_operand (op1, GET_MODE (op1)))
17835 emit_insn (gen_rtx_SET (op0, op1));
17836 /* ??? If we have typed data, then it would appear that using
17837 movdqu is the only way to get unaligned data loaded with
17838 integer type. */
17839 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17841 if (GET_MODE (op0) != V16QImode)
17843 orig_op0 = op0;
17844 op0 = gen_reg_rtx (V16QImode);
17846 op1 = gen_lowpart (V16QImode, op1);
17847 /* We will eventually emit movups based on insn attributes. */
17848 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17849 if (orig_op0)
17850 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17852 else if (TARGET_SSE2 && mode == V2DFmode)
17854 rtx zero;
17856 if (TARGET_AVX
17857 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17858 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17859 || optimize_insn_for_size_p ())
17861 /* We will eventually emit movups based on insn attributes. */
17862 emit_insn (gen_sse2_loadupd (op0, op1));
17863 return;
17866 /* When SSE registers are split into halves, we can avoid
17867 writing to the top half twice. */
17868 if (TARGET_SSE_SPLIT_REGS)
17870 emit_clobber (op0);
17871 zero = op0;
17873 else
17875 /* ??? Not sure about the best option for the Intel chips.
17876 The following would seem to satisfy; the register is
17877 entirely cleared, breaking the dependency chain. We
17878 then store to the upper half, with a dependency depth
17879 of one. A rumor has it that Intel recommends two movsd
17880 followed by an unpacklpd, but this is unconfirmed. And
17881 given that the dependency depth of the unpacklpd would
17882 still be one, I'm not sure why this would be better. */
17883 zero = CONST0_RTX (V2DFmode);
17886 m = adjust_address (op1, DFmode, 0);
17887 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17888 m = adjust_address (op1, DFmode, 8);
17889 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17891 else
17893 rtx t;
17895 if (TARGET_AVX
17896 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17897 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17898 || optimize_insn_for_size_p ())
17900 if (GET_MODE (op0) != V4SFmode)
17902 orig_op0 = op0;
17903 op0 = gen_reg_rtx (V4SFmode);
17905 op1 = gen_lowpart (V4SFmode, op1);
17906 emit_insn (gen_sse_loadups (op0, op1));
17907 if (orig_op0)
17908 emit_move_insn (orig_op0,
17909 gen_lowpart (GET_MODE (orig_op0), op0));
17910 return;
17913 if (mode != V4SFmode)
17914 t = gen_reg_rtx (V4SFmode);
17915 else
17916 t = op0;
17918 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17919 emit_move_insn (t, CONST0_RTX (V4SFmode));
17920 else
17921 emit_clobber (t);
17923 m = adjust_address (op1, V2SFmode, 0);
17924 emit_insn (gen_sse_loadlps (t, t, m));
17925 m = adjust_address (op1, V2SFmode, 8);
17926 emit_insn (gen_sse_loadhps (t, t, m));
17927 if (mode != V4SFmode)
17928 emit_move_insn (op0, gen_lowpart (mode, t));
17931 else if (MEM_P (op0))
17933 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17935 op0 = gen_lowpart (V16QImode, op0);
17936 op1 = gen_lowpart (V16QImode, op1);
17937 /* We will eventually emit movups based on insn attributes. */
17938 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17940 else if (TARGET_SSE2 && mode == V2DFmode)
17942 if (TARGET_AVX
17943 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17944 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17945 || optimize_insn_for_size_p ())
17946 /* We will eventually emit movups based on insn attributes. */
17947 emit_insn (gen_sse2_storeupd (op0, op1));
17948 else
17950 m = adjust_address (op0, DFmode, 0);
17951 emit_insn (gen_sse2_storelpd (m, op1));
17952 m = adjust_address (op0, DFmode, 8);
17953 emit_insn (gen_sse2_storehpd (m, op1));
17956 else
17958 if (mode != V4SFmode)
17959 op1 = gen_lowpart (V4SFmode, op1);
17961 if (TARGET_AVX
17962 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17963 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17964 || optimize_insn_for_size_p ())
17966 op0 = gen_lowpart (V4SFmode, op0);
17967 emit_insn (gen_sse_storeups (op0, op1));
17969 else
17971 m = adjust_address (op0, V2SFmode, 0);
17972 emit_insn (gen_sse_storelps (m, op1));
17973 m = adjust_address (op0, V2SFmode, 8);
17974 emit_insn (gen_sse_storehps (m, op1));
17978 else
17979 gcc_unreachable ();
17982 /* Helper function of ix86_fixup_binary_operands to canonicalize
17983 operand order. Returns true if the operands should be swapped. */
17985 static bool
17986 ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode,
17987 rtx operands[])
17989 rtx dst = operands[0];
17990 rtx src1 = operands[1];
17991 rtx src2 = operands[2];
17993 /* If the operation is not commutative, we can't do anything. */
17994 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17995 return false;
17997 /* Highest priority is that src1 should match dst. */
17998 if (rtx_equal_p (dst, src1))
17999 return false;
18000 if (rtx_equal_p (dst, src2))
18001 return true;
18003 /* Next highest priority is that immediate constants come second. */
18004 if (immediate_operand (src2, mode))
18005 return false;
18006 if (immediate_operand (src1, mode))
18007 return true;
18009 /* Lowest priority is that memory references should come second. */
18010 if (MEM_P (src2))
18011 return false;
18012 if (MEM_P (src1))
18013 return true;
18015 return false;
18019 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
18020 destination to use for the operation. If different from the true
18021 destination in operands[0], a copy operation will be required. */
18024 ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
18025 rtx operands[])
18027 rtx dst = operands[0];
18028 rtx src1 = operands[1];
18029 rtx src2 = operands[2];
18031 /* Canonicalize operand order. */
18032 if (ix86_swap_binary_operands_p (code, mode, operands))
18034 /* It is invalid to swap operands of different modes. */
18035 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
18037 std::swap (src1, src2);
18040 /* Both source operands cannot be in memory. */
18041 if (MEM_P (src1) && MEM_P (src2))
18043 /* Optimization: Only read from memory once. */
18044 if (rtx_equal_p (src1, src2))
18046 src2 = force_reg (mode, src2);
18047 src1 = src2;
18049 else if (rtx_equal_p (dst, src1))
18050 src2 = force_reg (mode, src2);
18051 else
18052 src1 = force_reg (mode, src1);
18055 /* If the destination is memory, and we do not have matching source
18056 operands, do things in registers. */
18057 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
18058 dst = gen_reg_rtx (mode);
18060 /* Source 1 cannot be a constant. */
18061 if (CONSTANT_P (src1))
18062 src1 = force_reg (mode, src1);
18064 /* Source 1 cannot be a non-matching memory. */
18065 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
18066 src1 = force_reg (mode, src1);
18068 /* Improve address combine. */
18069 if (code == PLUS
18070 && GET_MODE_CLASS (mode) == MODE_INT
18071 && MEM_P (src2))
18072 src2 = force_reg (mode, src2);
18074 operands[1] = src1;
18075 operands[2] = src2;
18076 return dst;
18079 /* Similarly, but assume that the destination has already been
18080 set up properly. */
18082 void
18083 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
18084 machine_mode mode, rtx operands[])
18086 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
18087 gcc_assert (dst == operands[0]);
18090 /* Attempt to expand a binary operator. Make the expansion closer to the
18091 actual machine, then just general_operand, which will allow 3 separate
18092 memory references (one output, two input) in a single insn. */
18094 void
18095 ix86_expand_binary_operator (enum rtx_code code, machine_mode mode,
18096 rtx operands[])
18098 rtx src1, src2, dst, op, clob;
18100 dst = ix86_fixup_binary_operands (code, mode, operands);
18101 src1 = operands[1];
18102 src2 = operands[2];
18104 /* Emit the instruction. */
18106 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, src1, src2));
18108 if (reload_completed
18109 && code == PLUS
18110 && !rtx_equal_p (dst, src1))
18112 /* This is going to be an LEA; avoid splitting it later. */
18113 emit_insn (op);
18115 else
18117 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18118 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18121 /* Fix up the destination if needed. */
18122 if (dst != operands[0])
18123 emit_move_insn (operands[0], dst);
18126 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
18127 the given OPERANDS. */
18129 void
18130 ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
18131 rtx operands[])
18133 rtx op1 = NULL_RTX, op2 = NULL_RTX;
18134 if (SUBREG_P (operands[1]))
18136 op1 = operands[1];
18137 op2 = operands[2];
18139 else if (SUBREG_P (operands[2]))
18141 op1 = operands[2];
18142 op2 = operands[1];
18144 /* Optimize (__m128i) d | (__m128i) e and similar code
18145 when d and e are float vectors into float vector logical
18146 insn. In C/C++ without using intrinsics there is no other way
18147 to express vector logical operation on float vectors than
18148 to cast them temporarily to integer vectors. */
18149 if (op1
18150 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
18151 && (SUBREG_P (op2) || GET_CODE (op2) == CONST_VECTOR)
18152 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
18153 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
18154 && SUBREG_BYTE (op1) == 0
18155 && (GET_CODE (op2) == CONST_VECTOR
18156 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
18157 && SUBREG_BYTE (op2) == 0))
18158 && can_create_pseudo_p ())
18160 rtx dst;
18161 switch (GET_MODE (SUBREG_REG (op1)))
18163 case V4SFmode:
18164 case V8SFmode:
18165 case V16SFmode:
18166 case V2DFmode:
18167 case V4DFmode:
18168 case V8DFmode:
18169 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
18170 if (GET_CODE (op2) == CONST_VECTOR)
18172 op2 = gen_lowpart (GET_MODE (dst), op2);
18173 op2 = force_reg (GET_MODE (dst), op2);
18175 else
18177 op1 = operands[1];
18178 op2 = SUBREG_REG (operands[2]);
18179 if (!nonimmediate_operand (op2, GET_MODE (dst)))
18180 op2 = force_reg (GET_MODE (dst), op2);
18182 op1 = SUBREG_REG (op1);
18183 if (!nonimmediate_operand (op1, GET_MODE (dst)))
18184 op1 = force_reg (GET_MODE (dst), op1);
18185 emit_insn (gen_rtx_SET (dst,
18186 gen_rtx_fmt_ee (code, GET_MODE (dst),
18187 op1, op2)));
18188 emit_move_insn (operands[0], gen_lowpart (mode, dst));
18189 return;
18190 default:
18191 break;
18194 if (!nonimmediate_operand (operands[1], mode))
18195 operands[1] = force_reg (mode, operands[1]);
18196 if (!nonimmediate_operand (operands[2], mode))
18197 operands[2] = force_reg (mode, operands[2]);
18198 ix86_fixup_binary_operands_no_copy (code, mode, operands);
18199 emit_insn (gen_rtx_SET (operands[0],
18200 gen_rtx_fmt_ee (code, mode, operands[1],
18201 operands[2])));
18204 /* Return TRUE or FALSE depending on whether the binary operator meets the
18205 appropriate constraints. */
18207 bool
18208 ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
18209 rtx operands[3])
18211 rtx dst = operands[0];
18212 rtx src1 = operands[1];
18213 rtx src2 = operands[2];
18215 /* Both source operands cannot be in memory. */
18216 if (MEM_P (src1) && MEM_P (src2))
18217 return false;
18219 /* Canonicalize operand order for commutative operators. */
18220 if (ix86_swap_binary_operands_p (code, mode, operands))
18221 std::swap (src1, src2);
18223 /* If the destination is memory, we must have a matching source operand. */
18224 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
18225 return false;
18227 /* Source 1 cannot be a constant. */
18228 if (CONSTANT_P (src1))
18229 return false;
18231 /* Source 1 cannot be a non-matching memory. */
18232 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
18233 /* Support "andhi/andsi/anddi" as a zero-extending move. */
18234 return (code == AND
18235 && (mode == HImode
18236 || mode == SImode
18237 || (TARGET_64BIT && mode == DImode))
18238 && satisfies_constraint_L (src2));
18240 return true;
18243 /* Attempt to expand a unary operator. Make the expansion closer to the
18244 actual machine, then just general_operand, which will allow 2 separate
18245 memory references (one output, one input) in a single insn. */
18247 void
18248 ix86_expand_unary_operator (enum rtx_code code, machine_mode mode,
18249 rtx operands[])
18251 bool matching_memory = false;
18252 rtx src, dst, op, clob;
18254 dst = operands[0];
18255 src = operands[1];
18257 /* If the destination is memory, and we do not have matching source
18258 operands, do things in registers. */
18259 if (MEM_P (dst))
18261 if (rtx_equal_p (dst, src))
18262 matching_memory = true;
18263 else
18264 dst = gen_reg_rtx (mode);
18267 /* When source operand is memory, destination must match. */
18268 if (MEM_P (src) && !matching_memory)
18269 src = force_reg (mode, src);
18271 /* Emit the instruction. */
18273 op = gen_rtx_SET (dst, gen_rtx_fmt_e (code, mode, src));
18275 if (code == NOT)
18276 emit_insn (op);
18277 else
18279 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18280 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18283 /* Fix up the destination if needed. */
18284 if (dst != operands[0])
18285 emit_move_insn (operands[0], dst);
18288 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
18289 divisor are within the range [0-255]. */
18291 void
18292 ix86_split_idivmod (machine_mode mode, rtx operands[],
18293 bool signed_p)
18295 rtx_code_label *end_label, *qimode_label;
18296 rtx insn, div, mod;
18297 rtx scratch, tmp0, tmp1, tmp2;
18298 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
18299 rtx (*gen_zero_extend) (rtx, rtx);
18300 rtx (*gen_test_ccno_1) (rtx, rtx);
18302 switch (mode)
18304 case SImode:
18305 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
18306 gen_test_ccno_1 = gen_testsi_ccno_1;
18307 gen_zero_extend = gen_zero_extendqisi2;
18308 break;
18309 case DImode:
18310 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
18311 gen_test_ccno_1 = gen_testdi_ccno_1;
18312 gen_zero_extend = gen_zero_extendqidi2;
18313 break;
18314 default:
18315 gcc_unreachable ();
18318 end_label = gen_label_rtx ();
18319 qimode_label = gen_label_rtx ();
18321 scratch = gen_reg_rtx (mode);
18323 /* Use 8bit unsigned divimod if dividend and divisor are within
18324 the range [0-255]. */
18325 emit_move_insn (scratch, operands[2]);
18326 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
18327 scratch, 1, OPTAB_DIRECT);
18328 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
18329 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
18330 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
18331 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
18332 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
18333 pc_rtx);
18334 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp0));
18335 predict_jump (REG_BR_PROB_BASE * 50 / 100);
18336 JUMP_LABEL (insn) = qimode_label;
18338 /* Generate original signed/unsigned divimod. */
18339 div = gen_divmod4_1 (operands[0], operands[1],
18340 operands[2], operands[3]);
18341 emit_insn (div);
18343 /* Branch to the end. */
18344 emit_jump_insn (gen_jump (end_label));
18345 emit_barrier ();
18347 /* Generate 8bit unsigned divide. */
18348 emit_label (qimode_label);
18349 /* Don't use operands[0] for result of 8bit divide since not all
18350 registers support QImode ZERO_EXTRACT. */
18351 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
18352 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
18353 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
18354 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
18356 if (signed_p)
18358 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
18359 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
18361 else
18363 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
18364 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
18367 /* Extract remainder from AH. */
18368 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
18369 if (REG_P (operands[1]))
18370 insn = emit_move_insn (operands[1], tmp1);
18371 else
18373 /* Need a new scratch register since the old one has result
18374 of 8bit divide. */
18375 scratch = gen_reg_rtx (mode);
18376 emit_move_insn (scratch, tmp1);
18377 insn = emit_move_insn (operands[1], scratch);
18379 set_unique_reg_note (insn, REG_EQUAL, mod);
18381 /* Zero extend quotient from AL. */
18382 tmp1 = gen_lowpart (QImode, tmp0);
18383 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
18384 set_unique_reg_note (insn, REG_EQUAL, div);
18386 emit_label (end_label);
18389 #define LEA_MAX_STALL (3)
18390 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
18392 /* Increase given DISTANCE in half-cycles according to
18393 dependencies between PREV and NEXT instructions.
18394 Add 1 half-cycle if there is no dependency and
18395 go to next cycle if there is some dependecy. */
18397 static unsigned int
18398 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
18400 df_ref def, use;
18402 if (!prev || !next)
18403 return distance + (distance & 1) + 2;
18405 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
18406 return distance + 1;
18408 FOR_EACH_INSN_USE (use, next)
18409 FOR_EACH_INSN_DEF (def, prev)
18410 if (!DF_REF_IS_ARTIFICIAL (def)
18411 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
18412 return distance + (distance & 1) + 2;
18414 return distance + 1;
18417 /* Function checks if instruction INSN defines register number
18418 REGNO1 or REGNO2. */
18420 static bool
18421 insn_defines_reg (unsigned int regno1, unsigned int regno2,
18422 rtx_insn *insn)
18424 df_ref def;
18426 FOR_EACH_INSN_DEF (def, insn)
18427 if (DF_REF_REG_DEF_P (def)
18428 && !DF_REF_IS_ARTIFICIAL (def)
18429 && (regno1 == DF_REF_REGNO (def)
18430 || regno2 == DF_REF_REGNO (def)))
18431 return true;
18433 return false;
18436 /* Function checks if instruction INSN uses register number
18437 REGNO as a part of address expression. */
18439 static bool
18440 insn_uses_reg_mem (unsigned int regno, rtx insn)
18442 df_ref use;
18444 FOR_EACH_INSN_USE (use, insn)
18445 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
18446 return true;
18448 return false;
18451 /* Search backward for non-agu definition of register number REGNO1
18452 or register number REGNO2 in basic block starting from instruction
18453 START up to head of basic block or instruction INSN.
18455 Function puts true value into *FOUND var if definition was found
18456 and false otherwise.
18458 Distance in half-cycles between START and found instruction or head
18459 of BB is added to DISTANCE and returned. */
18461 static int
18462 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
18463 rtx_insn *insn, int distance,
18464 rtx_insn *start, bool *found)
18466 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
18467 rtx_insn *prev = start;
18468 rtx_insn *next = NULL;
18470 *found = false;
18472 while (prev
18473 && prev != insn
18474 && distance < LEA_SEARCH_THRESHOLD)
18476 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
18478 distance = increase_distance (prev, next, distance);
18479 if (insn_defines_reg (regno1, regno2, prev))
18481 if (recog_memoized (prev) < 0
18482 || get_attr_type (prev) != TYPE_LEA)
18484 *found = true;
18485 return distance;
18489 next = prev;
18491 if (prev == BB_HEAD (bb))
18492 break;
18494 prev = PREV_INSN (prev);
18497 return distance;
18500 /* Search backward for non-agu definition of register number REGNO1
18501 or register number REGNO2 in INSN's basic block until
18502 1. Pass LEA_SEARCH_THRESHOLD instructions, or
18503 2. Reach neighbour BBs boundary, or
18504 3. Reach agu definition.
18505 Returns the distance between the non-agu definition point and INSN.
18506 If no definition point, returns -1. */
18508 static int
18509 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
18510 rtx_insn *insn)
18512 basic_block bb = BLOCK_FOR_INSN (insn);
18513 int distance = 0;
18514 bool found = false;
18516 if (insn != BB_HEAD (bb))
18517 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
18518 distance, PREV_INSN (insn),
18519 &found);
18521 if (!found && distance < LEA_SEARCH_THRESHOLD)
18523 edge e;
18524 edge_iterator ei;
18525 bool simple_loop = false;
18527 FOR_EACH_EDGE (e, ei, bb->preds)
18528 if (e->src == bb)
18530 simple_loop = true;
18531 break;
18534 if (simple_loop)
18535 distance = distance_non_agu_define_in_bb (regno1, regno2,
18536 insn, distance,
18537 BB_END (bb), &found);
18538 else
18540 int shortest_dist = -1;
18541 bool found_in_bb = false;
18543 FOR_EACH_EDGE (e, ei, bb->preds)
18545 int bb_dist
18546 = distance_non_agu_define_in_bb (regno1, regno2,
18547 insn, distance,
18548 BB_END (e->src),
18549 &found_in_bb);
18550 if (found_in_bb)
18552 if (shortest_dist < 0)
18553 shortest_dist = bb_dist;
18554 else if (bb_dist > 0)
18555 shortest_dist = MIN (bb_dist, shortest_dist);
18557 found = true;
18561 distance = shortest_dist;
18565 /* get_attr_type may modify recog data. We want to make sure
18566 that recog data is valid for instruction INSN, on which
18567 distance_non_agu_define is called. INSN is unchanged here. */
18568 extract_insn_cached (insn);
18570 if (!found)
18571 return -1;
18573 return distance >> 1;
18576 /* Return the distance in half-cycles between INSN and the next
18577 insn that uses register number REGNO in memory address added
18578 to DISTANCE. Return -1 if REGNO0 is set.
18580 Put true value into *FOUND if register usage was found and
18581 false otherwise.
18582 Put true value into *REDEFINED if register redefinition was
18583 found and false otherwise. */
18585 static int
18586 distance_agu_use_in_bb (unsigned int regno,
18587 rtx_insn *insn, int distance, rtx_insn *start,
18588 bool *found, bool *redefined)
18590 basic_block bb = NULL;
18591 rtx_insn *next = start;
18592 rtx_insn *prev = NULL;
18594 *found = false;
18595 *redefined = false;
18597 if (start != NULL_RTX)
18599 bb = BLOCK_FOR_INSN (start);
18600 if (start != BB_HEAD (bb))
18601 /* If insn and start belong to the same bb, set prev to insn,
18602 so the call to increase_distance will increase the distance
18603 between insns by 1. */
18604 prev = insn;
18607 while (next
18608 && next != insn
18609 && distance < LEA_SEARCH_THRESHOLD)
18611 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
18613 distance = increase_distance(prev, next, distance);
18614 if (insn_uses_reg_mem (regno, next))
18616 /* Return DISTANCE if OP0 is used in memory
18617 address in NEXT. */
18618 *found = true;
18619 return distance;
18622 if (insn_defines_reg (regno, INVALID_REGNUM, next))
18624 /* Return -1 if OP0 is set in NEXT. */
18625 *redefined = true;
18626 return -1;
18629 prev = next;
18632 if (next == BB_END (bb))
18633 break;
18635 next = NEXT_INSN (next);
18638 return distance;
18641 /* Return the distance between INSN and the next insn that uses
18642 register number REGNO0 in memory address. Return -1 if no such
18643 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
18645 static int
18646 distance_agu_use (unsigned int regno0, rtx_insn *insn)
18648 basic_block bb = BLOCK_FOR_INSN (insn);
18649 int distance = 0;
18650 bool found = false;
18651 bool redefined = false;
18653 if (insn != BB_END (bb))
18654 distance = distance_agu_use_in_bb (regno0, insn, distance,
18655 NEXT_INSN (insn),
18656 &found, &redefined);
18658 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18660 edge e;
18661 edge_iterator ei;
18662 bool simple_loop = false;
18664 FOR_EACH_EDGE (e, ei, bb->succs)
18665 if (e->dest == bb)
18667 simple_loop = true;
18668 break;
18671 if (simple_loop)
18672 distance = distance_agu_use_in_bb (regno0, insn,
18673 distance, BB_HEAD (bb),
18674 &found, &redefined);
18675 else
18677 int shortest_dist = -1;
18678 bool found_in_bb = false;
18679 bool redefined_in_bb = false;
18681 FOR_EACH_EDGE (e, ei, bb->succs)
18683 int bb_dist
18684 = distance_agu_use_in_bb (regno0, insn,
18685 distance, BB_HEAD (e->dest),
18686 &found_in_bb, &redefined_in_bb);
18687 if (found_in_bb)
18689 if (shortest_dist < 0)
18690 shortest_dist = bb_dist;
18691 else if (bb_dist > 0)
18692 shortest_dist = MIN (bb_dist, shortest_dist);
18694 found = true;
18698 distance = shortest_dist;
18702 if (!found || redefined)
18703 return -1;
18705 return distance >> 1;
18708 /* Define this macro to tune LEA priority vs ADD, it take effect when
18709 there is a dilemma of choicing LEA or ADD
18710 Negative value: ADD is more preferred than LEA
18711 Zero: Netrual
18712 Positive value: LEA is more preferred than ADD*/
18713 #define IX86_LEA_PRIORITY 0
18715 /* Return true if usage of lea INSN has performance advantage
18716 over a sequence of instructions. Instructions sequence has
18717 SPLIT_COST cycles higher latency than lea latency. */
18719 static bool
18720 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
18721 unsigned int regno2, int split_cost, bool has_scale)
18723 int dist_define, dist_use;
18725 /* For Silvermont if using a 2-source or 3-source LEA for
18726 non-destructive destination purposes, or due to wanting
18727 ability to use SCALE, the use of LEA is justified. */
18728 if (TARGET_SILVERMONT || TARGET_INTEL)
18730 if (has_scale)
18731 return true;
18732 if (split_cost < 1)
18733 return false;
18734 if (regno0 == regno1 || regno0 == regno2)
18735 return false;
18736 return true;
18739 dist_define = distance_non_agu_define (regno1, regno2, insn);
18740 dist_use = distance_agu_use (regno0, insn);
18742 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18744 /* If there is no non AGU operand definition, no AGU
18745 operand usage and split cost is 0 then both lea
18746 and non lea variants have same priority. Currently
18747 we prefer lea for 64 bit code and non lea on 32 bit
18748 code. */
18749 if (dist_use < 0 && split_cost == 0)
18750 return TARGET_64BIT || IX86_LEA_PRIORITY;
18751 else
18752 return true;
18755 /* With longer definitions distance lea is more preferable.
18756 Here we change it to take into account splitting cost and
18757 lea priority. */
18758 dist_define += split_cost + IX86_LEA_PRIORITY;
18760 /* If there is no use in memory addess then we just check
18761 that split cost exceeds AGU stall. */
18762 if (dist_use < 0)
18763 return dist_define > LEA_MAX_STALL;
18765 /* If this insn has both backward non-agu dependence and forward
18766 agu dependence, the one with short distance takes effect. */
18767 return dist_define >= dist_use;
18770 /* Return true if it is legal to clobber flags by INSN and
18771 false otherwise. */
18773 static bool
18774 ix86_ok_to_clobber_flags (rtx_insn *insn)
18776 basic_block bb = BLOCK_FOR_INSN (insn);
18777 df_ref use;
18778 bitmap live;
18780 while (insn)
18782 if (NONDEBUG_INSN_P (insn))
18784 FOR_EACH_INSN_USE (use, insn)
18785 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
18786 return false;
18788 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18789 return true;
18792 if (insn == BB_END (bb))
18793 break;
18795 insn = NEXT_INSN (insn);
18798 live = df_get_live_out(bb);
18799 return !REGNO_REG_SET_P (live, FLAGS_REG);
18802 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18803 move and add to avoid AGU stalls. */
18805 bool
18806 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
18808 unsigned int regno0, regno1, regno2;
18810 /* Check if we need to optimize. */
18811 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18812 return false;
18814 /* Check it is correct to split here. */
18815 if (!ix86_ok_to_clobber_flags(insn))
18816 return false;
18818 regno0 = true_regnum (operands[0]);
18819 regno1 = true_regnum (operands[1]);
18820 regno2 = true_regnum (operands[2]);
18822 /* We need to split only adds with non destructive
18823 destination operand. */
18824 if (regno0 == regno1 || regno0 == regno2)
18825 return false;
18826 else
18827 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18830 /* Return true if we should emit lea instruction instead of mov
18831 instruction. */
18833 bool
18834 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
18836 unsigned int regno0, regno1;
18838 /* Check if we need to optimize. */
18839 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18840 return false;
18842 /* Use lea for reg to reg moves only. */
18843 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18844 return false;
18846 regno0 = true_regnum (operands[0]);
18847 regno1 = true_regnum (operands[1]);
18849 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18852 /* Return true if we need to split lea into a sequence of
18853 instructions to avoid AGU stalls. */
18855 bool
18856 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
18858 unsigned int regno0, regno1, regno2;
18859 int split_cost;
18860 struct ix86_address parts;
18861 int ok;
18863 /* Check we need to optimize. */
18864 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18865 return false;
18867 /* The "at least two components" test below might not catch simple
18868 move or zero extension insns if parts.base is non-NULL and parts.disp
18869 is const0_rtx as the only components in the address, e.g. if the
18870 register is %rbp or %r13. As this test is much cheaper and moves or
18871 zero extensions are the common case, do this check first. */
18872 if (REG_P (operands[1])
18873 || (SImode_address_operand (operands[1], VOIDmode)
18874 && REG_P (XEXP (operands[1], 0))))
18875 return false;
18877 /* Check if it is OK to split here. */
18878 if (!ix86_ok_to_clobber_flags (insn))
18879 return false;
18881 ok = ix86_decompose_address (operands[1], &parts);
18882 gcc_assert (ok);
18884 /* There should be at least two components in the address. */
18885 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18886 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18887 return false;
18889 /* We should not split into add if non legitimate pic
18890 operand is used as displacement. */
18891 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18892 return false;
18894 regno0 = true_regnum (operands[0]) ;
18895 regno1 = INVALID_REGNUM;
18896 regno2 = INVALID_REGNUM;
18898 if (parts.base)
18899 regno1 = true_regnum (parts.base);
18900 if (parts.index)
18901 regno2 = true_regnum (parts.index);
18903 split_cost = 0;
18905 /* Compute how many cycles we will add to execution time
18906 if split lea into a sequence of instructions. */
18907 if (parts.base || parts.index)
18909 /* Have to use mov instruction if non desctructive
18910 destination form is used. */
18911 if (regno1 != regno0 && regno2 != regno0)
18912 split_cost += 1;
18914 /* Have to add index to base if both exist. */
18915 if (parts.base && parts.index)
18916 split_cost += 1;
18918 /* Have to use shift and adds if scale is 2 or greater. */
18919 if (parts.scale > 1)
18921 if (regno0 != regno1)
18922 split_cost += 1;
18923 else if (regno2 == regno0)
18924 split_cost += 4;
18925 else
18926 split_cost += parts.scale;
18929 /* Have to use add instruction with immediate if
18930 disp is non zero. */
18931 if (parts.disp && parts.disp != const0_rtx)
18932 split_cost += 1;
18934 /* Subtract the price of lea. */
18935 split_cost -= 1;
18938 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18939 parts.scale > 1);
18942 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18943 matches destination. RTX includes clobber of FLAGS_REG. */
18945 static void
18946 ix86_emit_binop (enum rtx_code code, machine_mode mode,
18947 rtx dst, rtx src)
18949 rtx op, clob;
18951 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, dst, src));
18952 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18954 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18957 /* Return true if regno1 def is nearest to the insn. */
18959 static bool
18960 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
18962 rtx_insn *prev = insn;
18963 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
18965 if (insn == start)
18966 return false;
18967 while (prev && prev != start)
18969 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18971 prev = PREV_INSN (prev);
18972 continue;
18974 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18975 return true;
18976 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18977 return false;
18978 prev = PREV_INSN (prev);
18981 /* None of the regs is defined in the bb. */
18982 return false;
18985 /* Split lea instructions into a sequence of instructions
18986 which are executed on ALU to avoid AGU stalls.
18987 It is assumed that it is allowed to clobber flags register
18988 at lea position. */
18990 void
18991 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode)
18993 unsigned int regno0, regno1, regno2;
18994 struct ix86_address parts;
18995 rtx target, tmp;
18996 int ok, adds;
18998 ok = ix86_decompose_address (operands[1], &parts);
18999 gcc_assert (ok);
19001 target = gen_lowpart (mode, operands[0]);
19003 regno0 = true_regnum (target);
19004 regno1 = INVALID_REGNUM;
19005 regno2 = INVALID_REGNUM;
19007 if (parts.base)
19009 parts.base = gen_lowpart (mode, parts.base);
19010 regno1 = true_regnum (parts.base);
19013 if (parts.index)
19015 parts.index = gen_lowpart (mode, parts.index);
19016 regno2 = true_regnum (parts.index);
19019 if (parts.disp)
19020 parts.disp = gen_lowpart (mode, parts.disp);
19022 if (parts.scale > 1)
19024 /* Case r1 = r1 + ... */
19025 if (regno1 == regno0)
19027 /* If we have a case r1 = r1 + C * r2 then we
19028 should use multiplication which is very
19029 expensive. Assume cost model is wrong if we
19030 have such case here. */
19031 gcc_assert (regno2 != regno0);
19033 for (adds = parts.scale; adds > 0; adds--)
19034 ix86_emit_binop (PLUS, mode, target, parts.index);
19036 else
19038 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
19039 if (regno0 != regno2)
19040 emit_insn (gen_rtx_SET (target, parts.index));
19042 /* Use shift for scaling. */
19043 ix86_emit_binop (ASHIFT, mode, target,
19044 GEN_INT (exact_log2 (parts.scale)));
19046 if (parts.base)
19047 ix86_emit_binop (PLUS, mode, target, parts.base);
19049 if (parts.disp && parts.disp != const0_rtx)
19050 ix86_emit_binop (PLUS, mode, target, parts.disp);
19053 else if (!parts.base && !parts.index)
19055 gcc_assert(parts.disp);
19056 emit_insn (gen_rtx_SET (target, parts.disp));
19058 else
19060 if (!parts.base)
19062 if (regno0 != regno2)
19063 emit_insn (gen_rtx_SET (target, parts.index));
19065 else if (!parts.index)
19067 if (regno0 != regno1)
19068 emit_insn (gen_rtx_SET (target, parts.base));
19070 else
19072 if (regno0 == regno1)
19073 tmp = parts.index;
19074 else if (regno0 == regno2)
19075 tmp = parts.base;
19076 else
19078 rtx tmp1;
19080 /* Find better operand for SET instruction, depending
19081 on which definition is farther from the insn. */
19082 if (find_nearest_reg_def (insn, regno1, regno2))
19083 tmp = parts.index, tmp1 = parts.base;
19084 else
19085 tmp = parts.base, tmp1 = parts.index;
19087 emit_insn (gen_rtx_SET (target, tmp));
19089 if (parts.disp && parts.disp != const0_rtx)
19090 ix86_emit_binop (PLUS, mode, target, parts.disp);
19092 ix86_emit_binop (PLUS, mode, target, tmp1);
19093 return;
19096 ix86_emit_binop (PLUS, mode, target, tmp);
19099 if (parts.disp && parts.disp != const0_rtx)
19100 ix86_emit_binop (PLUS, mode, target, parts.disp);
19104 /* Return true if it is ok to optimize an ADD operation to LEA
19105 operation to avoid flag register consumation. For most processors,
19106 ADD is faster than LEA. For the processors like BONNELL, if the
19107 destination register of LEA holds an actual address which will be
19108 used soon, LEA is better and otherwise ADD is better. */
19110 bool
19111 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
19113 unsigned int regno0 = true_regnum (operands[0]);
19114 unsigned int regno1 = true_regnum (operands[1]);
19115 unsigned int regno2 = true_regnum (operands[2]);
19117 /* If a = b + c, (a!=b && a!=c), must use lea form. */
19118 if (regno0 != regno1 && regno0 != regno2)
19119 return true;
19121 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
19122 return false;
19124 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
19127 /* Return true if destination reg of SET_BODY is shift count of
19128 USE_BODY. */
19130 static bool
19131 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
19133 rtx set_dest;
19134 rtx shift_rtx;
19135 int i;
19137 /* Retrieve destination of SET_BODY. */
19138 switch (GET_CODE (set_body))
19140 case SET:
19141 set_dest = SET_DEST (set_body);
19142 if (!set_dest || !REG_P (set_dest))
19143 return false;
19144 break;
19145 case PARALLEL:
19146 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
19147 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
19148 use_body))
19149 return true;
19150 default:
19151 return false;
19152 break;
19155 /* Retrieve shift count of USE_BODY. */
19156 switch (GET_CODE (use_body))
19158 case SET:
19159 shift_rtx = XEXP (use_body, 1);
19160 break;
19161 case PARALLEL:
19162 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
19163 if (ix86_dep_by_shift_count_body (set_body,
19164 XVECEXP (use_body, 0, i)))
19165 return true;
19166 default:
19167 return false;
19168 break;
19171 if (shift_rtx
19172 && (GET_CODE (shift_rtx) == ASHIFT
19173 || GET_CODE (shift_rtx) == LSHIFTRT
19174 || GET_CODE (shift_rtx) == ASHIFTRT
19175 || GET_CODE (shift_rtx) == ROTATE
19176 || GET_CODE (shift_rtx) == ROTATERT))
19178 rtx shift_count = XEXP (shift_rtx, 1);
19180 /* Return true if shift count is dest of SET_BODY. */
19181 if (REG_P (shift_count))
19183 /* Add check since it can be invoked before register
19184 allocation in pre-reload schedule. */
19185 if (reload_completed
19186 && true_regnum (set_dest) == true_regnum (shift_count))
19187 return true;
19188 else if (REGNO(set_dest) == REGNO(shift_count))
19189 return true;
19193 return false;
19196 /* Return true if destination reg of SET_INSN is shift count of
19197 USE_INSN. */
19199 bool
19200 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
19202 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
19203 PATTERN (use_insn));
19206 /* Return TRUE or FALSE depending on whether the unary operator meets the
19207 appropriate constraints. */
19209 bool
19210 ix86_unary_operator_ok (enum rtx_code,
19211 machine_mode,
19212 rtx operands[2])
19214 /* If one of operands is memory, source and destination must match. */
19215 if ((MEM_P (operands[0])
19216 || MEM_P (operands[1]))
19217 && ! rtx_equal_p (operands[0], operands[1]))
19218 return false;
19219 return true;
19222 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
19223 are ok, keeping in mind the possible movddup alternative. */
19225 bool
19226 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
19228 if (MEM_P (operands[0]))
19229 return rtx_equal_p (operands[0], operands[1 + high]);
19230 if (MEM_P (operands[1]) && MEM_P (operands[2]))
19231 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
19232 return true;
19235 /* Post-reload splitter for converting an SF or DFmode value in an
19236 SSE register into an unsigned SImode. */
19238 void
19239 ix86_split_convert_uns_si_sse (rtx operands[])
19241 machine_mode vecmode;
19242 rtx value, large, zero_or_two31, input, two31, x;
19244 large = operands[1];
19245 zero_or_two31 = operands[2];
19246 input = operands[3];
19247 two31 = operands[4];
19248 vecmode = GET_MODE (large);
19249 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
19251 /* Load up the value into the low element. We must ensure that the other
19252 elements are valid floats -- zero is the easiest such value. */
19253 if (MEM_P (input))
19255 if (vecmode == V4SFmode)
19256 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
19257 else
19258 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
19260 else
19262 input = gen_rtx_REG (vecmode, REGNO (input));
19263 emit_move_insn (value, CONST0_RTX (vecmode));
19264 if (vecmode == V4SFmode)
19265 emit_insn (gen_sse_movss (value, value, input));
19266 else
19267 emit_insn (gen_sse2_movsd (value, value, input));
19270 emit_move_insn (large, two31);
19271 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
19273 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
19274 emit_insn (gen_rtx_SET (large, x));
19276 x = gen_rtx_AND (vecmode, zero_or_two31, large);
19277 emit_insn (gen_rtx_SET (zero_or_two31, x));
19279 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
19280 emit_insn (gen_rtx_SET (value, x));
19282 large = gen_rtx_REG (V4SImode, REGNO (large));
19283 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
19285 x = gen_rtx_REG (V4SImode, REGNO (value));
19286 if (vecmode == V4SFmode)
19287 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
19288 else
19289 emit_insn (gen_sse2_cvttpd2dq (x, value));
19290 value = x;
19292 emit_insn (gen_xorv4si3 (value, value, large));
19295 /* Convert an unsigned DImode value into a DFmode, using only SSE.
19296 Expects the 64-bit DImode to be supplied in a pair of integral
19297 registers. Requires SSE2; will use SSE3 if available. For x86_32,
19298 -mfpmath=sse, !optimize_size only. */
19300 void
19301 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
19303 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
19304 rtx int_xmm, fp_xmm;
19305 rtx biases, exponents;
19306 rtx x;
19308 int_xmm = gen_reg_rtx (V4SImode);
19309 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
19310 emit_insn (gen_movdi_to_sse (int_xmm, input));
19311 else if (TARGET_SSE_SPLIT_REGS)
19313 emit_clobber (int_xmm);
19314 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
19316 else
19318 x = gen_reg_rtx (V2DImode);
19319 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
19320 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
19323 x = gen_rtx_CONST_VECTOR (V4SImode,
19324 gen_rtvec (4, GEN_INT (0x43300000UL),
19325 GEN_INT (0x45300000UL),
19326 const0_rtx, const0_rtx));
19327 exponents = validize_mem (force_const_mem (V4SImode, x));
19329 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
19330 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
19332 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
19333 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
19334 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
19335 (0x1.0p84 + double(fp_value_hi_xmm)).
19336 Note these exponents differ by 32. */
19338 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
19340 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
19341 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
19342 real_ldexp (&bias_lo_rvt, &dconst1, 52);
19343 real_ldexp (&bias_hi_rvt, &dconst1, 84);
19344 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
19345 x = const_double_from_real_value (bias_hi_rvt, DFmode);
19346 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
19347 biases = validize_mem (force_const_mem (V2DFmode, biases));
19348 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
19350 /* Add the upper and lower DFmode values together. */
19351 if (TARGET_SSE3)
19352 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
19353 else
19355 x = copy_to_mode_reg (V2DFmode, fp_xmm);
19356 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
19357 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
19360 ix86_expand_vector_extract (false, target, fp_xmm, 0);
19363 /* Not used, but eases macroization of patterns. */
19364 void
19365 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
19367 gcc_unreachable ();
19370 /* Convert an unsigned SImode value into a DFmode. Only currently used
19371 for SSE, but applicable anywhere. */
19373 void
19374 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
19376 REAL_VALUE_TYPE TWO31r;
19377 rtx x, fp;
19379 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
19380 NULL, 1, OPTAB_DIRECT);
19382 fp = gen_reg_rtx (DFmode);
19383 emit_insn (gen_floatsidf2 (fp, x));
19385 real_ldexp (&TWO31r, &dconst1, 31);
19386 x = const_double_from_real_value (TWO31r, DFmode);
19388 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
19389 if (x != target)
19390 emit_move_insn (target, x);
19393 /* Convert a signed DImode value into a DFmode. Only used for SSE in
19394 32-bit mode; otherwise we have a direct convert instruction. */
19396 void
19397 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
19399 REAL_VALUE_TYPE TWO32r;
19400 rtx fp_lo, fp_hi, x;
19402 fp_lo = gen_reg_rtx (DFmode);
19403 fp_hi = gen_reg_rtx (DFmode);
19405 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
19407 real_ldexp (&TWO32r, &dconst1, 32);
19408 x = const_double_from_real_value (TWO32r, DFmode);
19409 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
19411 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
19413 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
19414 0, OPTAB_DIRECT);
19415 if (x != target)
19416 emit_move_insn (target, x);
19419 /* Convert an unsigned SImode value into a SFmode, using only SSE.
19420 For x86_32, -mfpmath=sse, !optimize_size only. */
19421 void
19422 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
19424 REAL_VALUE_TYPE ONE16r;
19425 rtx fp_hi, fp_lo, int_hi, int_lo, x;
19427 real_ldexp (&ONE16r, &dconst1, 16);
19428 x = const_double_from_real_value (ONE16r, SFmode);
19429 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
19430 NULL, 0, OPTAB_DIRECT);
19431 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
19432 NULL, 0, OPTAB_DIRECT);
19433 fp_hi = gen_reg_rtx (SFmode);
19434 fp_lo = gen_reg_rtx (SFmode);
19435 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
19436 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
19437 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
19438 0, OPTAB_DIRECT);
19439 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
19440 0, OPTAB_DIRECT);
19441 if (!rtx_equal_p (target, fp_hi))
19442 emit_move_insn (target, fp_hi);
19445 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
19446 a vector of unsigned ints VAL to vector of floats TARGET. */
19448 void
19449 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
19451 rtx tmp[8];
19452 REAL_VALUE_TYPE TWO16r;
19453 machine_mode intmode = GET_MODE (val);
19454 machine_mode fltmode = GET_MODE (target);
19455 rtx (*cvt) (rtx, rtx);
19457 if (intmode == V4SImode)
19458 cvt = gen_floatv4siv4sf2;
19459 else
19460 cvt = gen_floatv8siv8sf2;
19461 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
19462 tmp[0] = force_reg (intmode, tmp[0]);
19463 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
19464 OPTAB_DIRECT);
19465 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
19466 NULL_RTX, 1, OPTAB_DIRECT);
19467 tmp[3] = gen_reg_rtx (fltmode);
19468 emit_insn (cvt (tmp[3], tmp[1]));
19469 tmp[4] = gen_reg_rtx (fltmode);
19470 emit_insn (cvt (tmp[4], tmp[2]));
19471 real_ldexp (&TWO16r, &dconst1, 16);
19472 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
19473 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
19474 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
19475 OPTAB_DIRECT);
19476 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
19477 OPTAB_DIRECT);
19478 if (tmp[7] != target)
19479 emit_move_insn (target, tmp[7]);
19482 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
19483 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
19484 This is done by doing just signed conversion if < 0x1p31, and otherwise by
19485 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
19488 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
19490 REAL_VALUE_TYPE TWO31r;
19491 rtx two31r, tmp[4];
19492 machine_mode mode = GET_MODE (val);
19493 machine_mode scalarmode = GET_MODE_INNER (mode);
19494 machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
19495 rtx (*cmp) (rtx, rtx, rtx, rtx);
19496 int i;
19498 for (i = 0; i < 3; i++)
19499 tmp[i] = gen_reg_rtx (mode);
19500 real_ldexp (&TWO31r, &dconst1, 31);
19501 two31r = const_double_from_real_value (TWO31r, scalarmode);
19502 two31r = ix86_build_const_vector (mode, 1, two31r);
19503 two31r = force_reg (mode, two31r);
19504 switch (mode)
19506 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
19507 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
19508 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
19509 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
19510 default: gcc_unreachable ();
19512 tmp[3] = gen_rtx_LE (mode, two31r, val);
19513 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
19514 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
19515 0, OPTAB_DIRECT);
19516 if (intmode == V4SImode || TARGET_AVX2)
19517 *xorp = expand_simple_binop (intmode, ASHIFT,
19518 gen_lowpart (intmode, tmp[0]),
19519 GEN_INT (31), NULL_RTX, 0,
19520 OPTAB_DIRECT);
19521 else
19523 rtx two31 = GEN_INT (HOST_WIDE_INT_1U << 31);
19524 two31 = ix86_build_const_vector (intmode, 1, two31);
19525 *xorp = expand_simple_binop (intmode, AND,
19526 gen_lowpart (intmode, tmp[0]),
19527 two31, NULL_RTX, 0,
19528 OPTAB_DIRECT);
19530 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
19531 0, OPTAB_DIRECT);
19534 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
19535 then replicate the value for all elements of the vector
19536 register. */
19539 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
19541 int i, n_elt;
19542 rtvec v;
19543 machine_mode scalar_mode;
19545 switch (mode)
19547 case V64QImode:
19548 case V32QImode:
19549 case V16QImode:
19550 case V32HImode:
19551 case V16HImode:
19552 case V8HImode:
19553 case V16SImode:
19554 case V8SImode:
19555 case V4SImode:
19556 case V8DImode:
19557 case V4DImode:
19558 case V2DImode:
19559 gcc_assert (vect);
19560 case V16SFmode:
19561 case V8SFmode:
19562 case V4SFmode:
19563 case V8DFmode:
19564 case V4DFmode:
19565 case V2DFmode:
19566 n_elt = GET_MODE_NUNITS (mode);
19567 v = rtvec_alloc (n_elt);
19568 scalar_mode = GET_MODE_INNER (mode);
19570 RTVEC_ELT (v, 0) = value;
19572 for (i = 1; i < n_elt; ++i)
19573 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
19575 return gen_rtx_CONST_VECTOR (mode, v);
19577 default:
19578 gcc_unreachable ();
19582 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
19583 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
19584 for an SSE register. If VECT is true, then replicate the mask for
19585 all elements of the vector register. If INVERT is true, then create
19586 a mask excluding the sign bit. */
19589 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
19591 machine_mode vec_mode, imode;
19592 wide_int w;
19593 rtx mask, v;
19595 switch (mode)
19597 case V16SImode:
19598 case V16SFmode:
19599 case V8SImode:
19600 case V4SImode:
19601 case V8SFmode:
19602 case V4SFmode:
19603 vec_mode = mode;
19604 imode = SImode;
19605 break;
19607 case V8DImode:
19608 case V4DImode:
19609 case V2DImode:
19610 case V8DFmode:
19611 case V4DFmode:
19612 case V2DFmode:
19613 vec_mode = mode;
19614 imode = DImode;
19615 break;
19617 case TImode:
19618 case TFmode:
19619 vec_mode = VOIDmode;
19620 imode = TImode;
19621 break;
19623 default:
19624 gcc_unreachable ();
19627 machine_mode inner_mode = GET_MODE_INNER (mode);
19628 w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1,
19629 GET_MODE_BITSIZE (inner_mode));
19630 if (invert)
19631 w = wi::bit_not (w);
19633 /* Force this value into the low part of a fp vector constant. */
19634 mask = immed_wide_int_const (w, imode);
19635 mask = gen_lowpart (inner_mode, mask);
19637 if (vec_mode == VOIDmode)
19638 return force_reg (inner_mode, mask);
19640 v = ix86_build_const_vector (vec_mode, vect, mask);
19641 return force_reg (vec_mode, v);
19644 /* Generate code for floating point ABS or NEG. */
19646 void
19647 ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode,
19648 rtx operands[])
19650 rtx mask, set, dst, src;
19651 bool use_sse = false;
19652 bool vector_mode = VECTOR_MODE_P (mode);
19653 machine_mode vmode = mode;
19655 if (vector_mode)
19656 use_sse = true;
19657 else if (mode == TFmode)
19658 use_sse = true;
19659 else if (TARGET_SSE_MATH)
19661 use_sse = SSE_FLOAT_MODE_P (mode);
19662 if (mode == SFmode)
19663 vmode = V4SFmode;
19664 else if (mode == DFmode)
19665 vmode = V2DFmode;
19668 /* NEG and ABS performed with SSE use bitwise mask operations.
19669 Create the appropriate mask now. */
19670 if (use_sse)
19671 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19672 else
19673 mask = NULL_RTX;
19675 dst = operands[0];
19676 src = operands[1];
19678 set = gen_rtx_fmt_e (code, mode, src);
19679 set = gen_rtx_SET (dst, set);
19681 if (mask)
19683 rtx use, clob;
19684 rtvec par;
19686 use = gen_rtx_USE (VOIDmode, mask);
19687 if (vector_mode)
19688 par = gen_rtvec (2, set, use);
19689 else
19691 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19692 par = gen_rtvec (3, set, use, clob);
19694 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19696 else
19697 emit_insn (set);
19700 /* Expand a copysign operation. Special case operand 0 being a constant. */
19702 void
19703 ix86_expand_copysign (rtx operands[])
19705 machine_mode mode, vmode;
19706 rtx dest, op0, op1, mask, nmask;
19708 dest = operands[0];
19709 op0 = operands[1];
19710 op1 = operands[2];
19712 mode = GET_MODE (dest);
19714 if (mode == SFmode)
19715 vmode = V4SFmode;
19716 else if (mode == DFmode)
19717 vmode = V2DFmode;
19718 else
19719 vmode = mode;
19721 if (CONST_DOUBLE_P (op0))
19723 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19725 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19726 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19728 if (mode == SFmode || mode == DFmode)
19730 if (op0 == CONST0_RTX (mode))
19731 op0 = CONST0_RTX (vmode);
19732 else
19734 rtx v = ix86_build_const_vector (vmode, false, op0);
19736 op0 = force_reg (vmode, v);
19739 else if (op0 != CONST0_RTX (mode))
19740 op0 = force_reg (mode, op0);
19742 mask = ix86_build_signbit_mask (vmode, 0, 0);
19744 if (mode == SFmode)
19745 copysign_insn = gen_copysignsf3_const;
19746 else if (mode == DFmode)
19747 copysign_insn = gen_copysigndf3_const;
19748 else
19749 copysign_insn = gen_copysigntf3_const;
19751 emit_insn (copysign_insn (dest, op0, op1, mask));
19753 else
19755 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19757 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19758 mask = ix86_build_signbit_mask (vmode, 0, 0);
19760 if (mode == SFmode)
19761 copysign_insn = gen_copysignsf3_var;
19762 else if (mode == DFmode)
19763 copysign_insn = gen_copysigndf3_var;
19764 else
19765 copysign_insn = gen_copysigntf3_var;
19767 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19771 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19772 be a constant, and so has already been expanded into a vector constant. */
19774 void
19775 ix86_split_copysign_const (rtx operands[])
19777 machine_mode mode, vmode;
19778 rtx dest, op0, mask, x;
19780 dest = operands[0];
19781 op0 = operands[1];
19782 mask = operands[3];
19784 mode = GET_MODE (dest);
19785 vmode = GET_MODE (mask);
19787 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19788 x = gen_rtx_AND (vmode, dest, mask);
19789 emit_insn (gen_rtx_SET (dest, x));
19791 if (op0 != CONST0_RTX (vmode))
19793 x = gen_rtx_IOR (vmode, dest, op0);
19794 emit_insn (gen_rtx_SET (dest, x));
19798 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19799 so we have to do two masks. */
19801 void
19802 ix86_split_copysign_var (rtx operands[])
19804 machine_mode mode, vmode;
19805 rtx dest, scratch, op0, op1, mask, nmask, x;
19807 dest = operands[0];
19808 scratch = operands[1];
19809 op0 = operands[2];
19810 op1 = operands[3];
19811 nmask = operands[4];
19812 mask = operands[5];
19814 mode = GET_MODE (dest);
19815 vmode = GET_MODE (mask);
19817 if (rtx_equal_p (op0, op1))
19819 /* Shouldn't happen often (it's useless, obviously), but when it does
19820 we'd generate incorrect code if we continue below. */
19821 emit_move_insn (dest, op0);
19822 return;
19825 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19827 gcc_assert (REGNO (op1) == REGNO (scratch));
19829 x = gen_rtx_AND (vmode, scratch, mask);
19830 emit_insn (gen_rtx_SET (scratch, x));
19832 dest = mask;
19833 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19834 x = gen_rtx_NOT (vmode, dest);
19835 x = gen_rtx_AND (vmode, x, op0);
19836 emit_insn (gen_rtx_SET (dest, x));
19838 else
19840 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19842 x = gen_rtx_AND (vmode, scratch, mask);
19844 else /* alternative 2,4 */
19846 gcc_assert (REGNO (mask) == REGNO (scratch));
19847 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19848 x = gen_rtx_AND (vmode, scratch, op1);
19850 emit_insn (gen_rtx_SET (scratch, x));
19852 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19854 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19855 x = gen_rtx_AND (vmode, dest, nmask);
19857 else /* alternative 3,4 */
19859 gcc_assert (REGNO (nmask) == REGNO (dest));
19860 dest = nmask;
19861 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19862 x = gen_rtx_AND (vmode, dest, op0);
19864 emit_insn (gen_rtx_SET (dest, x));
19867 x = gen_rtx_IOR (vmode, dest, scratch);
19868 emit_insn (gen_rtx_SET (dest, x));
19871 /* Return TRUE or FALSE depending on whether the first SET in INSN
19872 has source and destination with matching CC modes, and that the
19873 CC mode is at least as constrained as REQ_MODE. */
19875 bool
19876 ix86_match_ccmode (rtx insn, machine_mode req_mode)
19878 rtx set;
19879 machine_mode set_mode;
19881 set = PATTERN (insn);
19882 if (GET_CODE (set) == PARALLEL)
19883 set = XVECEXP (set, 0, 0);
19884 gcc_assert (GET_CODE (set) == SET);
19885 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19887 set_mode = GET_MODE (SET_DEST (set));
19888 switch (set_mode)
19890 case CCNOmode:
19891 if (req_mode != CCNOmode
19892 && (req_mode != CCmode
19893 || XEXP (SET_SRC (set), 1) != const0_rtx))
19894 return false;
19895 break;
19896 case CCmode:
19897 if (req_mode == CCGCmode)
19898 return false;
19899 /* FALLTHRU */
19900 case CCGCmode:
19901 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19902 return false;
19903 /* FALLTHRU */
19904 case CCGOCmode:
19905 if (req_mode == CCZmode)
19906 return false;
19907 /* FALLTHRU */
19908 case CCZmode:
19909 break;
19911 case CCAmode:
19912 case CCCmode:
19913 case CCOmode:
19914 case CCPmode:
19915 case CCSmode:
19916 if (set_mode != req_mode)
19917 return false;
19918 break;
19920 default:
19921 gcc_unreachable ();
19924 return GET_MODE (SET_SRC (set)) == set_mode;
19927 /* Generate insn patterns to do an integer compare of OPERANDS. */
19929 static rtx
19930 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19932 machine_mode cmpmode;
19933 rtx tmp, flags;
19935 cmpmode = SELECT_CC_MODE (code, op0, op1);
19936 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19938 /* This is very simple, but making the interface the same as in the
19939 FP case makes the rest of the code easier. */
19940 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19941 emit_insn (gen_rtx_SET (flags, tmp));
19943 /* Return the test that should be put into the flags user, i.e.
19944 the bcc, scc, or cmov instruction. */
19945 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19948 /* Figure out whether to use ordered or unordered fp comparisons.
19949 Return the appropriate mode to use. */
19951 machine_mode
19952 ix86_fp_compare_mode (enum rtx_code)
19954 /* ??? In order to make all comparisons reversible, we do all comparisons
19955 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19956 all forms trapping and nontrapping comparisons, we can make inequality
19957 comparisons trapping again, since it results in better code when using
19958 FCOM based compares. */
19959 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19962 machine_mode
19963 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19965 machine_mode mode = GET_MODE (op0);
19967 if (SCALAR_FLOAT_MODE_P (mode))
19969 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19970 return ix86_fp_compare_mode (code);
19973 switch (code)
19975 /* Only zero flag is needed. */
19976 case EQ: /* ZF=0 */
19977 case NE: /* ZF!=0 */
19978 return CCZmode;
19979 /* Codes needing carry flag. */
19980 case GEU: /* CF=0 */
19981 case LTU: /* CF=1 */
19982 /* Detect overflow checks. They need just the carry flag. */
19983 if (GET_CODE (op0) == PLUS
19984 && rtx_equal_p (op1, XEXP (op0, 0)))
19985 return CCCmode;
19986 else
19987 return CCmode;
19988 case GTU: /* CF=0 & ZF=0 */
19989 case LEU: /* CF=1 | ZF=1 */
19990 return CCmode;
19991 /* Codes possibly doable only with sign flag when
19992 comparing against zero. */
19993 case GE: /* SF=OF or SF=0 */
19994 case LT: /* SF<>OF or SF=1 */
19995 if (op1 == const0_rtx)
19996 return CCGOCmode;
19997 else
19998 /* For other cases Carry flag is not required. */
19999 return CCGCmode;
20000 /* Codes doable only with sign flag when comparing
20001 against zero, but we miss jump instruction for it
20002 so we need to use relational tests against overflow
20003 that thus needs to be zero. */
20004 case GT: /* ZF=0 & SF=OF */
20005 case LE: /* ZF=1 | SF<>OF */
20006 if (op1 == const0_rtx)
20007 return CCNOmode;
20008 else
20009 return CCGCmode;
20010 /* strcmp pattern do (use flags) and combine may ask us for proper
20011 mode. */
20012 case USE:
20013 return CCmode;
20014 default:
20015 gcc_unreachable ();
20019 /* Return the fixed registers used for condition codes. */
20021 static bool
20022 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
20024 *p1 = FLAGS_REG;
20025 *p2 = FPSR_REG;
20026 return true;
20029 /* If two condition code modes are compatible, return a condition code
20030 mode which is compatible with both. Otherwise, return
20031 VOIDmode. */
20033 static machine_mode
20034 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
20036 if (m1 == m2)
20037 return m1;
20039 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
20040 return VOIDmode;
20042 if ((m1 == CCGCmode && m2 == CCGOCmode)
20043 || (m1 == CCGOCmode && m2 == CCGCmode))
20044 return CCGCmode;
20046 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
20047 return m2;
20048 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
20049 return m1;
20051 switch (m1)
20053 default:
20054 gcc_unreachable ();
20056 case CCmode:
20057 case CCGCmode:
20058 case CCGOCmode:
20059 case CCNOmode:
20060 case CCAmode:
20061 case CCCmode:
20062 case CCOmode:
20063 case CCPmode:
20064 case CCSmode:
20065 case CCZmode:
20066 switch (m2)
20068 default:
20069 return VOIDmode;
20071 case CCmode:
20072 case CCGCmode:
20073 case CCGOCmode:
20074 case CCNOmode:
20075 case CCAmode:
20076 case CCCmode:
20077 case CCOmode:
20078 case CCPmode:
20079 case CCSmode:
20080 case CCZmode:
20081 return CCmode;
20084 case CCFPmode:
20085 case CCFPUmode:
20086 /* These are only compatible with themselves, which we already
20087 checked above. */
20088 return VOIDmode;
20093 /* Return a comparison we can do and that it is equivalent to
20094 swap_condition (code) apart possibly from orderedness.
20095 But, never change orderedness if TARGET_IEEE_FP, returning
20096 UNKNOWN in that case if necessary. */
20098 static enum rtx_code
20099 ix86_fp_swap_condition (enum rtx_code code)
20101 switch (code)
20103 case GT: /* GTU - CF=0 & ZF=0 */
20104 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
20105 case GE: /* GEU - CF=0 */
20106 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
20107 case UNLT: /* LTU - CF=1 */
20108 return TARGET_IEEE_FP ? UNKNOWN : GT;
20109 case UNLE: /* LEU - CF=1 | ZF=1 */
20110 return TARGET_IEEE_FP ? UNKNOWN : GE;
20111 default:
20112 return swap_condition (code);
20116 /* Return cost of comparison CODE using the best strategy for performance.
20117 All following functions do use number of instructions as a cost metrics.
20118 In future this should be tweaked to compute bytes for optimize_size and
20119 take into account performance of various instructions on various CPUs. */
20121 static int
20122 ix86_fp_comparison_cost (enum rtx_code code)
20124 int arith_cost;
20126 /* The cost of code using bit-twiddling on %ah. */
20127 switch (code)
20129 case UNLE:
20130 case UNLT:
20131 case LTGT:
20132 case GT:
20133 case GE:
20134 case UNORDERED:
20135 case ORDERED:
20136 case UNEQ:
20137 arith_cost = 4;
20138 break;
20139 case LT:
20140 case NE:
20141 case EQ:
20142 case UNGE:
20143 arith_cost = TARGET_IEEE_FP ? 5 : 4;
20144 break;
20145 case LE:
20146 case UNGT:
20147 arith_cost = TARGET_IEEE_FP ? 6 : 4;
20148 break;
20149 default:
20150 gcc_unreachable ();
20153 switch (ix86_fp_comparison_strategy (code))
20155 case IX86_FPCMP_COMI:
20156 return arith_cost > 4 ? 3 : 2;
20157 case IX86_FPCMP_SAHF:
20158 return arith_cost > 4 ? 4 : 3;
20159 default:
20160 return arith_cost;
20164 /* Return strategy to use for floating-point. We assume that fcomi is always
20165 preferrable where available, since that is also true when looking at size
20166 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
20168 enum ix86_fpcmp_strategy
20169 ix86_fp_comparison_strategy (enum rtx_code)
20171 /* Do fcomi/sahf based test when profitable. */
20173 if (TARGET_CMOVE)
20174 return IX86_FPCMP_COMI;
20176 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
20177 return IX86_FPCMP_SAHF;
20179 return IX86_FPCMP_ARITH;
20182 /* Swap, force into registers, or otherwise massage the two operands
20183 to a fp comparison. The operands are updated in place; the new
20184 comparison code is returned. */
20186 static enum rtx_code
20187 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
20189 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
20190 rtx op0 = *pop0, op1 = *pop1;
20191 machine_mode op_mode = GET_MODE (op0);
20192 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
20194 /* All of the unordered compare instructions only work on registers.
20195 The same is true of the fcomi compare instructions. The XFmode
20196 compare instructions require registers except when comparing
20197 against zero or when converting operand 1 from fixed point to
20198 floating point. */
20200 if (!is_sse
20201 && (fpcmp_mode == CCFPUmode
20202 || (op_mode == XFmode
20203 && ! (standard_80387_constant_p (op0) == 1
20204 || standard_80387_constant_p (op1) == 1)
20205 && GET_CODE (op1) != FLOAT)
20206 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
20208 op0 = force_reg (op_mode, op0);
20209 op1 = force_reg (op_mode, op1);
20211 else
20213 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
20214 things around if they appear profitable, otherwise force op0
20215 into a register. */
20217 if (standard_80387_constant_p (op0) == 0
20218 || (MEM_P (op0)
20219 && ! (standard_80387_constant_p (op1) == 0
20220 || MEM_P (op1))))
20222 enum rtx_code new_code = ix86_fp_swap_condition (code);
20223 if (new_code != UNKNOWN)
20225 std::swap (op0, op1);
20226 code = new_code;
20230 if (!REG_P (op0))
20231 op0 = force_reg (op_mode, op0);
20233 if (CONSTANT_P (op1))
20235 int tmp = standard_80387_constant_p (op1);
20236 if (tmp == 0)
20237 op1 = validize_mem (force_const_mem (op_mode, op1));
20238 else if (tmp == 1)
20240 if (TARGET_CMOVE)
20241 op1 = force_reg (op_mode, op1);
20243 else
20244 op1 = force_reg (op_mode, op1);
20248 /* Try to rearrange the comparison to make it cheaper. */
20249 if (ix86_fp_comparison_cost (code)
20250 > ix86_fp_comparison_cost (swap_condition (code))
20251 && (REG_P (op1) || can_create_pseudo_p ()))
20253 std::swap (op0, op1);
20254 code = swap_condition (code);
20255 if (!REG_P (op0))
20256 op0 = force_reg (op_mode, op0);
20259 *pop0 = op0;
20260 *pop1 = op1;
20261 return code;
20264 /* Convert comparison codes we use to represent FP comparison to integer
20265 code that will result in proper branch. Return UNKNOWN if no such code
20266 is available. */
20268 enum rtx_code
20269 ix86_fp_compare_code_to_integer (enum rtx_code code)
20271 switch (code)
20273 case GT:
20274 return GTU;
20275 case GE:
20276 return GEU;
20277 case ORDERED:
20278 case UNORDERED:
20279 return code;
20280 break;
20281 case UNEQ:
20282 return EQ;
20283 break;
20284 case UNLT:
20285 return LTU;
20286 break;
20287 case UNLE:
20288 return LEU;
20289 break;
20290 case LTGT:
20291 return NE;
20292 break;
20293 default:
20294 return UNKNOWN;
20298 /* Generate insn patterns to do a floating point compare of OPERANDS. */
20300 static rtx
20301 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
20303 machine_mode fpcmp_mode, intcmp_mode;
20304 rtx tmp, tmp2;
20306 fpcmp_mode = ix86_fp_compare_mode (code);
20307 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
20309 /* Do fcomi/sahf based test when profitable. */
20310 switch (ix86_fp_comparison_strategy (code))
20312 case IX86_FPCMP_COMI:
20313 intcmp_mode = fpcmp_mode;
20314 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20315 tmp = gen_rtx_SET (gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
20316 emit_insn (tmp);
20317 break;
20319 case IX86_FPCMP_SAHF:
20320 intcmp_mode = fpcmp_mode;
20321 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20322 tmp = gen_rtx_SET (gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
20324 if (!scratch)
20325 scratch = gen_reg_rtx (HImode);
20326 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
20327 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
20328 break;
20330 case IX86_FPCMP_ARITH:
20331 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
20332 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20333 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
20334 if (!scratch)
20335 scratch = gen_reg_rtx (HImode);
20336 emit_insn (gen_rtx_SET (scratch, tmp2));
20338 /* In the unordered case, we have to check C2 for NaN's, which
20339 doesn't happen to work out to anything nice combination-wise.
20340 So do some bit twiddling on the value we've got in AH to come
20341 up with an appropriate set of condition codes. */
20343 intcmp_mode = CCNOmode;
20344 switch (code)
20346 case GT:
20347 case UNGT:
20348 if (code == GT || !TARGET_IEEE_FP)
20350 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20351 code = EQ;
20353 else
20355 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20356 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20357 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
20358 intcmp_mode = CCmode;
20359 code = GEU;
20361 break;
20362 case LT:
20363 case UNLT:
20364 if (code == LT && TARGET_IEEE_FP)
20366 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20367 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
20368 intcmp_mode = CCmode;
20369 code = EQ;
20371 else
20373 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
20374 code = NE;
20376 break;
20377 case GE:
20378 case UNGE:
20379 if (code == GE || !TARGET_IEEE_FP)
20381 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
20382 code = EQ;
20384 else
20386 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20387 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
20388 code = NE;
20390 break;
20391 case LE:
20392 case UNLE:
20393 if (code == LE && TARGET_IEEE_FP)
20395 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20396 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20397 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20398 intcmp_mode = CCmode;
20399 code = LTU;
20401 else
20403 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20404 code = NE;
20406 break;
20407 case EQ:
20408 case UNEQ:
20409 if (code == EQ && TARGET_IEEE_FP)
20411 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20412 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20413 intcmp_mode = CCmode;
20414 code = EQ;
20416 else
20418 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20419 code = NE;
20421 break;
20422 case NE:
20423 case LTGT:
20424 if (code == NE && TARGET_IEEE_FP)
20426 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20427 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
20428 GEN_INT (0x40)));
20429 code = NE;
20431 else
20433 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20434 code = EQ;
20436 break;
20438 case UNORDERED:
20439 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20440 code = NE;
20441 break;
20442 case ORDERED:
20443 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20444 code = EQ;
20445 break;
20447 default:
20448 gcc_unreachable ();
20450 break;
20452 default:
20453 gcc_unreachable();
20456 /* Return the test that should be put into the flags user, i.e.
20457 the bcc, scc, or cmov instruction. */
20458 return gen_rtx_fmt_ee (code, VOIDmode,
20459 gen_rtx_REG (intcmp_mode, FLAGS_REG),
20460 const0_rtx);
20463 static rtx
20464 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
20466 rtx ret;
20468 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
20469 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
20471 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
20473 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
20474 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20476 else
20477 ret = ix86_expand_int_compare (code, op0, op1);
20479 return ret;
20482 void
20483 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
20485 machine_mode mode = GET_MODE (op0);
20486 rtx tmp;
20488 switch (mode)
20490 case SFmode:
20491 case DFmode:
20492 case XFmode:
20493 case QImode:
20494 case HImode:
20495 case SImode:
20496 simple:
20497 tmp = ix86_expand_compare (code, op0, op1);
20498 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20499 gen_rtx_LABEL_REF (VOIDmode, label),
20500 pc_rtx);
20501 emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
20502 return;
20504 case DImode:
20505 if (TARGET_64BIT)
20506 goto simple;
20507 case TImode:
20508 /* Expand DImode branch into multiple compare+branch. */
20510 rtx lo[2], hi[2];
20511 rtx_code_label *label2;
20512 enum rtx_code code1, code2, code3;
20513 machine_mode submode;
20515 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
20517 std::swap (op0, op1);
20518 code = swap_condition (code);
20521 split_double_mode (mode, &op0, 1, lo+0, hi+0);
20522 split_double_mode (mode, &op1, 1, lo+1, hi+1);
20524 submode = mode == DImode ? SImode : DImode;
20526 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
20527 avoid two branches. This costs one extra insn, so disable when
20528 optimizing for size. */
20530 if ((code == EQ || code == NE)
20531 && (!optimize_insn_for_size_p ()
20532 || hi[1] == const0_rtx || lo[1] == const0_rtx))
20534 rtx xor0, xor1;
20536 xor1 = hi[0];
20537 if (hi[1] != const0_rtx)
20538 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
20539 NULL_RTX, 0, OPTAB_WIDEN);
20541 xor0 = lo[0];
20542 if (lo[1] != const0_rtx)
20543 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
20544 NULL_RTX, 0, OPTAB_WIDEN);
20546 tmp = expand_binop (submode, ior_optab, xor1, xor0,
20547 NULL_RTX, 0, OPTAB_WIDEN);
20549 ix86_expand_branch (code, tmp, const0_rtx, label);
20550 return;
20553 /* Otherwise, if we are doing less-than or greater-or-equal-than,
20554 op1 is a constant and the low word is zero, then we can just
20555 examine the high word. Similarly for low word -1 and
20556 less-or-equal-than or greater-than. */
20558 if (CONST_INT_P (hi[1]))
20559 switch (code)
20561 case LT: case LTU: case GE: case GEU:
20562 if (lo[1] == const0_rtx)
20564 ix86_expand_branch (code, hi[0], hi[1], label);
20565 return;
20567 break;
20568 case LE: case LEU: case GT: case GTU:
20569 if (lo[1] == constm1_rtx)
20571 ix86_expand_branch (code, hi[0], hi[1], label);
20572 return;
20574 break;
20575 default:
20576 break;
20579 /* Otherwise, we need two or three jumps. */
20581 label2 = gen_label_rtx ();
20583 code1 = code;
20584 code2 = swap_condition (code);
20585 code3 = unsigned_condition (code);
20587 switch (code)
20589 case LT: case GT: case LTU: case GTU:
20590 break;
20592 case LE: code1 = LT; code2 = GT; break;
20593 case GE: code1 = GT; code2 = LT; break;
20594 case LEU: code1 = LTU; code2 = GTU; break;
20595 case GEU: code1 = GTU; code2 = LTU; break;
20597 case EQ: code1 = UNKNOWN; code2 = NE; break;
20598 case NE: code2 = UNKNOWN; break;
20600 default:
20601 gcc_unreachable ();
20605 * a < b =>
20606 * if (hi(a) < hi(b)) goto true;
20607 * if (hi(a) > hi(b)) goto false;
20608 * if (lo(a) < lo(b)) goto true;
20609 * false:
20612 if (code1 != UNKNOWN)
20613 ix86_expand_branch (code1, hi[0], hi[1], label);
20614 if (code2 != UNKNOWN)
20615 ix86_expand_branch (code2, hi[0], hi[1], label2);
20617 ix86_expand_branch (code3, lo[0], lo[1], label);
20619 if (code2 != UNKNOWN)
20620 emit_label (label2);
20621 return;
20624 default:
20625 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20626 goto simple;
20630 /* Split branch based on floating point condition. */
20631 void
20632 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20633 rtx target1, rtx target2, rtx tmp)
20635 rtx condition;
20636 rtx i;
20638 if (target2 != pc_rtx)
20640 std::swap (target1, target2);
20641 code = reverse_condition_maybe_unordered (code);
20644 condition = ix86_expand_fp_compare (code, op1, op2,
20645 tmp);
20647 i = emit_jump_insn (gen_rtx_SET
20648 (pc_rtx,
20649 gen_rtx_IF_THEN_ELSE (VOIDmode,
20650 condition, target1, target2)));
20651 if (split_branch_probability >= 0)
20652 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20655 void
20656 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20658 rtx ret;
20660 gcc_assert (GET_MODE (dest) == QImode);
20662 ret = ix86_expand_compare (code, op0, op1);
20663 PUT_MODE (ret, QImode);
20664 emit_insn (gen_rtx_SET (dest, ret));
20667 /* Expand comparison setting or clearing carry flag. Return true when
20668 successful and set pop for the operation. */
20669 static bool
20670 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20672 machine_mode mode =
20673 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20675 /* Do not handle double-mode compares that go through special path. */
20676 if (mode == (TARGET_64BIT ? TImode : DImode))
20677 return false;
20679 if (SCALAR_FLOAT_MODE_P (mode))
20681 rtx compare_op;
20682 rtx_insn *compare_seq;
20684 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20686 /* Shortcut: following common codes never translate
20687 into carry flag compares. */
20688 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20689 || code == ORDERED || code == UNORDERED)
20690 return false;
20692 /* These comparisons require zero flag; swap operands so they won't. */
20693 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20694 && !TARGET_IEEE_FP)
20696 std::swap (op0, op1);
20697 code = swap_condition (code);
20700 /* Try to expand the comparison and verify that we end up with
20701 carry flag based comparison. This fails to be true only when
20702 we decide to expand comparison using arithmetic that is not
20703 too common scenario. */
20704 start_sequence ();
20705 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20706 compare_seq = get_insns ();
20707 end_sequence ();
20709 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20710 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20711 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20712 else
20713 code = GET_CODE (compare_op);
20715 if (code != LTU && code != GEU)
20716 return false;
20718 emit_insn (compare_seq);
20719 *pop = compare_op;
20720 return true;
20723 if (!INTEGRAL_MODE_P (mode))
20724 return false;
20726 switch (code)
20728 case LTU:
20729 case GEU:
20730 break;
20732 /* Convert a==0 into (unsigned)a<1. */
20733 case EQ:
20734 case NE:
20735 if (op1 != const0_rtx)
20736 return false;
20737 op1 = const1_rtx;
20738 code = (code == EQ ? LTU : GEU);
20739 break;
20741 /* Convert a>b into b<a or a>=b-1. */
20742 case GTU:
20743 case LEU:
20744 if (CONST_INT_P (op1))
20746 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20747 /* Bail out on overflow. We still can swap operands but that
20748 would force loading of the constant into register. */
20749 if (op1 == const0_rtx
20750 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20751 return false;
20752 code = (code == GTU ? GEU : LTU);
20754 else
20756 std::swap (op0, op1);
20757 code = (code == GTU ? LTU : GEU);
20759 break;
20761 /* Convert a>=0 into (unsigned)a<0x80000000. */
20762 case LT:
20763 case GE:
20764 if (mode == DImode || op1 != const0_rtx)
20765 return false;
20766 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20767 code = (code == LT ? GEU : LTU);
20768 break;
20769 case LE:
20770 case GT:
20771 if (mode == DImode || op1 != constm1_rtx)
20772 return false;
20773 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20774 code = (code == LE ? GEU : LTU);
20775 break;
20777 default:
20778 return false;
20780 /* Swapping operands may cause constant to appear as first operand. */
20781 if (!nonimmediate_operand (op0, VOIDmode))
20783 if (!can_create_pseudo_p ())
20784 return false;
20785 op0 = force_reg (mode, op0);
20787 *pop = ix86_expand_compare (code, op0, op1);
20788 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20789 return true;
20792 bool
20793 ix86_expand_int_movcc (rtx operands[])
20795 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20796 rtx_insn *compare_seq;
20797 rtx compare_op;
20798 machine_mode mode = GET_MODE (operands[0]);
20799 bool sign_bit_compare_p = false;
20800 rtx op0 = XEXP (operands[1], 0);
20801 rtx op1 = XEXP (operands[1], 1);
20803 if (GET_MODE (op0) == TImode
20804 || (GET_MODE (op0) == DImode
20805 && !TARGET_64BIT))
20806 return false;
20808 start_sequence ();
20809 compare_op = ix86_expand_compare (code, op0, op1);
20810 compare_seq = get_insns ();
20811 end_sequence ();
20813 compare_code = GET_CODE (compare_op);
20815 if ((op1 == const0_rtx && (code == GE || code == LT))
20816 || (op1 == constm1_rtx && (code == GT || code == LE)))
20817 sign_bit_compare_p = true;
20819 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20820 HImode insns, we'd be swallowed in word prefix ops. */
20822 if ((mode != HImode || TARGET_FAST_PREFIX)
20823 && (mode != (TARGET_64BIT ? TImode : DImode))
20824 && CONST_INT_P (operands[2])
20825 && CONST_INT_P (operands[3]))
20827 rtx out = operands[0];
20828 HOST_WIDE_INT ct = INTVAL (operands[2]);
20829 HOST_WIDE_INT cf = INTVAL (operands[3]);
20830 HOST_WIDE_INT diff;
20832 diff = ct - cf;
20833 /* Sign bit compares are better done using shifts than we do by using
20834 sbb. */
20835 if (sign_bit_compare_p
20836 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20838 /* Detect overlap between destination and compare sources. */
20839 rtx tmp = out;
20841 if (!sign_bit_compare_p)
20843 rtx flags;
20844 bool fpcmp = false;
20846 compare_code = GET_CODE (compare_op);
20848 flags = XEXP (compare_op, 0);
20850 if (GET_MODE (flags) == CCFPmode
20851 || GET_MODE (flags) == CCFPUmode)
20853 fpcmp = true;
20854 compare_code
20855 = ix86_fp_compare_code_to_integer (compare_code);
20858 /* To simplify rest of code, restrict to the GEU case. */
20859 if (compare_code == LTU)
20861 std::swap (ct, cf);
20862 compare_code = reverse_condition (compare_code);
20863 code = reverse_condition (code);
20865 else
20867 if (fpcmp)
20868 PUT_CODE (compare_op,
20869 reverse_condition_maybe_unordered
20870 (GET_CODE (compare_op)));
20871 else
20872 PUT_CODE (compare_op,
20873 reverse_condition (GET_CODE (compare_op)));
20875 diff = ct - cf;
20877 if (reg_overlap_mentioned_p (out, op0)
20878 || reg_overlap_mentioned_p (out, op1))
20879 tmp = gen_reg_rtx (mode);
20881 if (mode == DImode)
20882 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20883 else
20884 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20885 flags, compare_op));
20887 else
20889 if (code == GT || code == GE)
20890 code = reverse_condition (code);
20891 else
20893 std::swap (ct, cf);
20894 diff = ct - cf;
20896 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20899 if (diff == 1)
20902 * cmpl op0,op1
20903 * sbbl dest,dest
20904 * [addl dest, ct]
20906 * Size 5 - 8.
20908 if (ct)
20909 tmp = expand_simple_binop (mode, PLUS,
20910 tmp, GEN_INT (ct),
20911 copy_rtx (tmp), 1, OPTAB_DIRECT);
20913 else if (cf == -1)
20916 * cmpl op0,op1
20917 * sbbl dest,dest
20918 * orl $ct, dest
20920 * Size 8.
20922 tmp = expand_simple_binop (mode, IOR,
20923 tmp, GEN_INT (ct),
20924 copy_rtx (tmp), 1, OPTAB_DIRECT);
20926 else if (diff == -1 && ct)
20929 * cmpl op0,op1
20930 * sbbl dest,dest
20931 * notl dest
20932 * [addl dest, cf]
20934 * Size 8 - 11.
20936 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20937 if (cf)
20938 tmp = expand_simple_binop (mode, PLUS,
20939 copy_rtx (tmp), GEN_INT (cf),
20940 copy_rtx (tmp), 1, OPTAB_DIRECT);
20942 else
20945 * cmpl op0,op1
20946 * sbbl dest,dest
20947 * [notl dest]
20948 * andl cf - ct, dest
20949 * [addl dest, ct]
20951 * Size 8 - 11.
20954 if (cf == 0)
20956 cf = ct;
20957 ct = 0;
20958 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20961 tmp = expand_simple_binop (mode, AND,
20962 copy_rtx (tmp),
20963 gen_int_mode (cf - ct, mode),
20964 copy_rtx (tmp), 1, OPTAB_DIRECT);
20965 if (ct)
20966 tmp = expand_simple_binop (mode, PLUS,
20967 copy_rtx (tmp), GEN_INT (ct),
20968 copy_rtx (tmp), 1, OPTAB_DIRECT);
20971 if (!rtx_equal_p (tmp, out))
20972 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20974 return true;
20977 if (diff < 0)
20979 machine_mode cmp_mode = GET_MODE (op0);
20980 enum rtx_code new_code;
20982 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20984 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20986 /* We may be reversing unordered compare to normal compare, that
20987 is not valid in general (we may convert non-trapping condition
20988 to trapping one), however on i386 we currently emit all
20989 comparisons unordered. */
20990 new_code = reverse_condition_maybe_unordered (code);
20992 else
20993 new_code = ix86_reverse_condition (code, cmp_mode);
20994 if (new_code != UNKNOWN)
20996 std::swap (ct, cf);
20997 diff = -diff;
20998 code = new_code;
21002 compare_code = UNKNOWN;
21003 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
21004 && CONST_INT_P (op1))
21006 if (op1 == const0_rtx
21007 && (code == LT || code == GE))
21008 compare_code = code;
21009 else if (op1 == constm1_rtx)
21011 if (code == LE)
21012 compare_code = LT;
21013 else if (code == GT)
21014 compare_code = GE;
21018 /* Optimize dest = (op0 < 0) ? -1 : cf. */
21019 if (compare_code != UNKNOWN
21020 && GET_MODE (op0) == GET_MODE (out)
21021 && (cf == -1 || ct == -1))
21023 /* If lea code below could be used, only optimize
21024 if it results in a 2 insn sequence. */
21026 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
21027 || diff == 3 || diff == 5 || diff == 9)
21028 || (compare_code == LT && ct == -1)
21029 || (compare_code == GE && cf == -1))
21032 * notl op1 (if necessary)
21033 * sarl $31, op1
21034 * orl cf, op1
21036 if (ct != -1)
21038 cf = ct;
21039 ct = -1;
21040 code = reverse_condition (code);
21043 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
21045 out = expand_simple_binop (mode, IOR,
21046 out, GEN_INT (cf),
21047 out, 1, OPTAB_DIRECT);
21048 if (out != operands[0])
21049 emit_move_insn (operands[0], out);
21051 return true;
21056 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
21057 || diff == 3 || diff == 5 || diff == 9)
21058 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
21059 && (mode != DImode
21060 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
21063 * xorl dest,dest
21064 * cmpl op1,op2
21065 * setcc dest
21066 * lea cf(dest*(ct-cf)),dest
21068 * Size 14.
21070 * This also catches the degenerate setcc-only case.
21073 rtx tmp;
21074 int nops;
21076 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
21078 nops = 0;
21079 /* On x86_64 the lea instruction operates on Pmode, so we need
21080 to get arithmetics done in proper mode to match. */
21081 if (diff == 1)
21082 tmp = copy_rtx (out);
21083 else
21085 rtx out1;
21086 out1 = copy_rtx (out);
21087 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
21088 nops++;
21089 if (diff & 1)
21091 tmp = gen_rtx_PLUS (mode, tmp, out1);
21092 nops++;
21095 if (cf != 0)
21097 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
21098 nops++;
21100 if (!rtx_equal_p (tmp, out))
21102 if (nops == 1)
21103 out = force_operand (tmp, copy_rtx (out));
21104 else
21105 emit_insn (gen_rtx_SET (copy_rtx (out), copy_rtx (tmp)));
21107 if (!rtx_equal_p (out, operands[0]))
21108 emit_move_insn (operands[0], copy_rtx (out));
21110 return true;
21114 * General case: Jumpful:
21115 * xorl dest,dest cmpl op1, op2
21116 * cmpl op1, op2 movl ct, dest
21117 * setcc dest jcc 1f
21118 * decl dest movl cf, dest
21119 * andl (cf-ct),dest 1:
21120 * addl ct,dest
21122 * Size 20. Size 14.
21124 * This is reasonably steep, but branch mispredict costs are
21125 * high on modern cpus, so consider failing only if optimizing
21126 * for space.
21129 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
21130 && BRANCH_COST (optimize_insn_for_speed_p (),
21131 false) >= 2)
21133 if (cf == 0)
21135 machine_mode cmp_mode = GET_MODE (op0);
21136 enum rtx_code new_code;
21138 if (SCALAR_FLOAT_MODE_P (cmp_mode))
21140 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
21142 /* We may be reversing unordered compare to normal compare,
21143 that is not valid in general (we may convert non-trapping
21144 condition to trapping one), however on i386 we currently
21145 emit all comparisons unordered. */
21146 new_code = reverse_condition_maybe_unordered (code);
21148 else
21150 new_code = ix86_reverse_condition (code, cmp_mode);
21151 if (compare_code != UNKNOWN && new_code != UNKNOWN)
21152 compare_code = reverse_condition (compare_code);
21155 if (new_code != UNKNOWN)
21157 cf = ct;
21158 ct = 0;
21159 code = new_code;
21163 if (compare_code != UNKNOWN)
21165 /* notl op1 (if needed)
21166 sarl $31, op1
21167 andl (cf-ct), op1
21168 addl ct, op1
21170 For x < 0 (resp. x <= -1) there will be no notl,
21171 so if possible swap the constants to get rid of the
21172 complement.
21173 True/false will be -1/0 while code below (store flag
21174 followed by decrement) is 0/-1, so the constants need
21175 to be exchanged once more. */
21177 if (compare_code == GE || !cf)
21179 code = reverse_condition (code);
21180 compare_code = LT;
21182 else
21183 std::swap (ct, cf);
21185 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
21187 else
21189 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
21191 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
21192 constm1_rtx,
21193 copy_rtx (out), 1, OPTAB_DIRECT);
21196 out = expand_simple_binop (mode, AND, copy_rtx (out),
21197 gen_int_mode (cf - ct, mode),
21198 copy_rtx (out), 1, OPTAB_DIRECT);
21199 if (ct)
21200 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
21201 copy_rtx (out), 1, OPTAB_DIRECT);
21202 if (!rtx_equal_p (out, operands[0]))
21203 emit_move_insn (operands[0], copy_rtx (out));
21205 return true;
21209 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
21211 /* Try a few things more with specific constants and a variable. */
21213 optab op;
21214 rtx var, orig_out, out, tmp;
21216 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
21217 return false;
21219 /* If one of the two operands is an interesting constant, load a
21220 constant with the above and mask it in with a logical operation. */
21222 if (CONST_INT_P (operands[2]))
21224 var = operands[3];
21225 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
21226 operands[3] = constm1_rtx, op = and_optab;
21227 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
21228 operands[3] = const0_rtx, op = ior_optab;
21229 else
21230 return false;
21232 else if (CONST_INT_P (operands[3]))
21234 var = operands[2];
21235 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
21236 operands[2] = constm1_rtx, op = and_optab;
21237 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
21238 operands[2] = const0_rtx, op = ior_optab;
21239 else
21240 return false;
21242 else
21243 return false;
21245 orig_out = operands[0];
21246 tmp = gen_reg_rtx (mode);
21247 operands[0] = tmp;
21249 /* Recurse to get the constant loaded. */
21250 if (!ix86_expand_int_movcc (operands))
21251 return false;
21253 /* Mask in the interesting variable. */
21254 out = expand_binop (mode, op, var, tmp, orig_out, 0,
21255 OPTAB_WIDEN);
21256 if (!rtx_equal_p (out, orig_out))
21257 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
21259 return true;
21263 * For comparison with above,
21265 * movl cf,dest
21266 * movl ct,tmp
21267 * cmpl op1,op2
21268 * cmovcc tmp,dest
21270 * Size 15.
21273 if (! nonimmediate_operand (operands[2], mode))
21274 operands[2] = force_reg (mode, operands[2]);
21275 if (! nonimmediate_operand (operands[3], mode))
21276 operands[3] = force_reg (mode, operands[3]);
21278 if (! register_operand (operands[2], VOIDmode)
21279 && (mode == QImode
21280 || ! register_operand (operands[3], VOIDmode)))
21281 operands[2] = force_reg (mode, operands[2]);
21283 if (mode == QImode
21284 && ! register_operand (operands[3], VOIDmode))
21285 operands[3] = force_reg (mode, operands[3]);
21287 emit_insn (compare_seq);
21288 emit_insn (gen_rtx_SET (operands[0],
21289 gen_rtx_IF_THEN_ELSE (mode,
21290 compare_op, operands[2],
21291 operands[3])));
21292 return true;
21295 /* Swap, force into registers, or otherwise massage the two operands
21296 to an sse comparison with a mask result. Thus we differ a bit from
21297 ix86_prepare_fp_compare_args which expects to produce a flags result.
21299 The DEST operand exists to help determine whether to commute commutative
21300 operators. The POP0/POP1 operands are updated in place. The new
21301 comparison code is returned, or UNKNOWN if not implementable. */
21303 static enum rtx_code
21304 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
21305 rtx *pop0, rtx *pop1)
21307 switch (code)
21309 case LTGT:
21310 case UNEQ:
21311 /* AVX supports all the needed comparisons. */
21312 if (TARGET_AVX)
21313 break;
21314 /* We have no LTGT as an operator. We could implement it with
21315 NE & ORDERED, but this requires an extra temporary. It's
21316 not clear that it's worth it. */
21317 return UNKNOWN;
21319 case LT:
21320 case LE:
21321 case UNGT:
21322 case UNGE:
21323 /* These are supported directly. */
21324 break;
21326 case EQ:
21327 case NE:
21328 case UNORDERED:
21329 case ORDERED:
21330 /* AVX has 3 operand comparisons, no need to swap anything. */
21331 if (TARGET_AVX)
21332 break;
21333 /* For commutative operators, try to canonicalize the destination
21334 operand to be first in the comparison - this helps reload to
21335 avoid extra moves. */
21336 if (!dest || !rtx_equal_p (dest, *pop1))
21337 break;
21338 /* FALLTHRU */
21340 case GE:
21341 case GT:
21342 case UNLE:
21343 case UNLT:
21344 /* These are not supported directly before AVX, and furthermore
21345 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
21346 comparison operands to transform into something that is
21347 supported. */
21348 std::swap (*pop0, *pop1);
21349 code = swap_condition (code);
21350 break;
21352 default:
21353 gcc_unreachable ();
21356 return code;
21359 /* Detect conditional moves that exactly match min/max operational
21360 semantics. Note that this is IEEE safe, as long as we don't
21361 interchange the operands.
21363 Returns FALSE if this conditional move doesn't match a MIN/MAX,
21364 and TRUE if the operation is successful and instructions are emitted. */
21366 static bool
21367 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
21368 rtx cmp_op1, rtx if_true, rtx if_false)
21370 machine_mode mode;
21371 bool is_min;
21372 rtx tmp;
21374 if (code == LT)
21376 else if (code == UNGE)
21377 std::swap (if_true, if_false);
21378 else
21379 return false;
21381 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
21382 is_min = true;
21383 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
21384 is_min = false;
21385 else
21386 return false;
21388 mode = GET_MODE (dest);
21390 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
21391 but MODE may be a vector mode and thus not appropriate. */
21392 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
21394 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
21395 rtvec v;
21397 if_true = force_reg (mode, if_true);
21398 v = gen_rtvec (2, if_true, if_false);
21399 tmp = gen_rtx_UNSPEC (mode, v, u);
21401 else
21403 code = is_min ? SMIN : SMAX;
21404 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
21407 emit_insn (gen_rtx_SET (dest, tmp));
21408 return true;
21411 /* Expand an sse vector comparison. Return the register with the result. */
21413 static rtx
21414 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
21415 rtx op_true, rtx op_false)
21417 machine_mode mode = GET_MODE (dest);
21418 machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
21420 /* In general case result of comparison can differ from operands' type. */
21421 machine_mode cmp_mode;
21423 /* In AVX512F the result of comparison is an integer mask. */
21424 bool maskcmp = false;
21425 rtx x;
21427 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
21429 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
21430 gcc_assert (cmp_mode != BLKmode);
21432 maskcmp = true;
21434 else
21435 cmp_mode = cmp_ops_mode;
21438 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
21439 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
21440 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
21442 if (optimize
21443 || reg_overlap_mentioned_p (dest, op_true)
21444 || reg_overlap_mentioned_p (dest, op_false))
21445 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
21447 /* Compare patterns for int modes are unspec in AVX512F only. */
21448 if (maskcmp && (code == GT || code == EQ))
21450 rtx (*gen)(rtx, rtx, rtx);
21452 switch (cmp_ops_mode)
21454 case V64QImode:
21455 gcc_assert (TARGET_AVX512BW);
21456 gen = code == GT ? gen_avx512bw_gtv64qi3 : gen_avx512bw_eqv64qi3_1;
21457 break;
21458 case V32HImode:
21459 gcc_assert (TARGET_AVX512BW);
21460 gen = code == GT ? gen_avx512bw_gtv32hi3 : gen_avx512bw_eqv32hi3_1;
21461 break;
21462 case V16SImode:
21463 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
21464 break;
21465 case V8DImode:
21466 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
21467 break;
21468 default:
21469 gen = NULL;
21472 if (gen)
21474 emit_insn (gen (dest, cmp_op0, cmp_op1));
21475 return dest;
21478 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
21480 if (cmp_mode != mode && !maskcmp)
21482 x = force_reg (cmp_ops_mode, x);
21483 convert_move (dest, x, false);
21485 else
21486 emit_insn (gen_rtx_SET (dest, x));
21488 return dest;
21491 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
21492 operations. This is used for both scalar and vector conditional moves. */
21494 static void
21495 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
21497 machine_mode mode = GET_MODE (dest);
21498 machine_mode cmpmode = GET_MODE (cmp);
21500 /* In AVX512F the result of comparison is an integer mask. */
21501 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
21503 rtx t2, t3, x;
21505 if (vector_all_ones_operand (op_true, mode)
21506 && rtx_equal_p (op_false, CONST0_RTX (mode))
21507 && !maskcmp)
21509 emit_insn (gen_rtx_SET (dest, cmp));
21511 else if (op_false == CONST0_RTX (mode)
21512 && !maskcmp)
21514 op_true = force_reg (mode, op_true);
21515 x = gen_rtx_AND (mode, cmp, op_true);
21516 emit_insn (gen_rtx_SET (dest, x));
21518 else if (op_true == CONST0_RTX (mode)
21519 && !maskcmp)
21521 op_false = force_reg (mode, op_false);
21522 x = gen_rtx_NOT (mode, cmp);
21523 x = gen_rtx_AND (mode, x, op_false);
21524 emit_insn (gen_rtx_SET (dest, x));
21526 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
21527 && !maskcmp)
21529 op_false = force_reg (mode, op_false);
21530 x = gen_rtx_IOR (mode, cmp, op_false);
21531 emit_insn (gen_rtx_SET (dest, x));
21533 else if (TARGET_XOP
21534 && !maskcmp)
21536 op_true = force_reg (mode, op_true);
21538 if (!nonimmediate_operand (op_false, mode))
21539 op_false = force_reg (mode, op_false);
21541 emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cmp,
21542 op_true,
21543 op_false)));
21545 else
21547 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21548 rtx d = dest;
21550 if (!nonimmediate_operand (op_true, mode))
21551 op_true = force_reg (mode, op_true);
21553 op_false = force_reg (mode, op_false);
21555 switch (mode)
21557 case V4SFmode:
21558 if (TARGET_SSE4_1)
21559 gen = gen_sse4_1_blendvps;
21560 break;
21561 case V2DFmode:
21562 if (TARGET_SSE4_1)
21563 gen = gen_sse4_1_blendvpd;
21564 break;
21565 case V16QImode:
21566 case V8HImode:
21567 case V4SImode:
21568 case V2DImode:
21569 if (TARGET_SSE4_1)
21571 gen = gen_sse4_1_pblendvb;
21572 if (mode != V16QImode)
21573 d = gen_reg_rtx (V16QImode);
21574 op_false = gen_lowpart (V16QImode, op_false);
21575 op_true = gen_lowpart (V16QImode, op_true);
21576 cmp = gen_lowpart (V16QImode, cmp);
21578 break;
21579 case V8SFmode:
21580 if (TARGET_AVX)
21581 gen = gen_avx_blendvps256;
21582 break;
21583 case V4DFmode:
21584 if (TARGET_AVX)
21585 gen = gen_avx_blendvpd256;
21586 break;
21587 case V32QImode:
21588 case V16HImode:
21589 case V8SImode:
21590 case V4DImode:
21591 if (TARGET_AVX2)
21593 gen = gen_avx2_pblendvb;
21594 if (mode != V32QImode)
21595 d = gen_reg_rtx (V32QImode);
21596 op_false = gen_lowpart (V32QImode, op_false);
21597 op_true = gen_lowpart (V32QImode, op_true);
21598 cmp = gen_lowpart (V32QImode, cmp);
21600 break;
21602 case V64QImode:
21603 gen = gen_avx512bw_blendmv64qi;
21604 break;
21605 case V32HImode:
21606 gen = gen_avx512bw_blendmv32hi;
21607 break;
21608 case V16SImode:
21609 gen = gen_avx512f_blendmv16si;
21610 break;
21611 case V8DImode:
21612 gen = gen_avx512f_blendmv8di;
21613 break;
21614 case V8DFmode:
21615 gen = gen_avx512f_blendmv8df;
21616 break;
21617 case V16SFmode:
21618 gen = gen_avx512f_blendmv16sf;
21619 break;
21621 default:
21622 break;
21625 if (gen != NULL)
21627 emit_insn (gen (d, op_false, op_true, cmp));
21628 if (d != dest)
21629 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21631 else
21633 op_true = force_reg (mode, op_true);
21635 t2 = gen_reg_rtx (mode);
21636 if (optimize)
21637 t3 = gen_reg_rtx (mode);
21638 else
21639 t3 = dest;
21641 x = gen_rtx_AND (mode, op_true, cmp);
21642 emit_insn (gen_rtx_SET (t2, x));
21644 x = gen_rtx_NOT (mode, cmp);
21645 x = gen_rtx_AND (mode, x, op_false);
21646 emit_insn (gen_rtx_SET (t3, x));
21648 x = gen_rtx_IOR (mode, t3, t2);
21649 emit_insn (gen_rtx_SET (dest, x));
21654 /* Expand a floating-point conditional move. Return true if successful. */
21656 bool
21657 ix86_expand_fp_movcc (rtx operands[])
21659 machine_mode mode = GET_MODE (operands[0]);
21660 enum rtx_code code = GET_CODE (operands[1]);
21661 rtx tmp, compare_op;
21662 rtx op0 = XEXP (operands[1], 0);
21663 rtx op1 = XEXP (operands[1], 1);
21665 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21667 machine_mode cmode;
21669 /* Since we've no cmove for sse registers, don't force bad register
21670 allocation just to gain access to it. Deny movcc when the
21671 comparison mode doesn't match the move mode. */
21672 cmode = GET_MODE (op0);
21673 if (cmode == VOIDmode)
21674 cmode = GET_MODE (op1);
21675 if (cmode != mode)
21676 return false;
21678 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21679 if (code == UNKNOWN)
21680 return false;
21682 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21683 operands[2], operands[3]))
21684 return true;
21686 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21687 operands[2], operands[3]);
21688 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21689 return true;
21692 if (GET_MODE (op0) == TImode
21693 || (GET_MODE (op0) == DImode
21694 && !TARGET_64BIT))
21695 return false;
21697 /* The floating point conditional move instructions don't directly
21698 support conditions resulting from a signed integer comparison. */
21700 compare_op = ix86_expand_compare (code, op0, op1);
21701 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21703 tmp = gen_reg_rtx (QImode);
21704 ix86_expand_setcc (tmp, code, op0, op1);
21706 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21709 emit_insn (gen_rtx_SET (operands[0],
21710 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21711 operands[2], operands[3])));
21713 return true;
21716 /* Expand a floating-point vector conditional move; a vcond operation
21717 rather than a movcc operation. */
21719 bool
21720 ix86_expand_fp_vcond (rtx operands[])
21722 enum rtx_code code = GET_CODE (operands[3]);
21723 rtx cmp;
21725 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21726 &operands[4], &operands[5]);
21727 if (code == UNKNOWN)
21729 rtx temp;
21730 switch (GET_CODE (operands[3]))
21732 case LTGT:
21733 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21734 operands[5], operands[0], operands[0]);
21735 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21736 operands[5], operands[1], operands[2]);
21737 code = AND;
21738 break;
21739 case UNEQ:
21740 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21741 operands[5], operands[0], operands[0]);
21742 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21743 operands[5], operands[1], operands[2]);
21744 code = IOR;
21745 break;
21746 default:
21747 gcc_unreachable ();
21749 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21750 OPTAB_DIRECT);
21751 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21752 return true;
21755 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21756 operands[5], operands[1], operands[2]))
21757 return true;
21759 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21760 operands[1], operands[2]);
21761 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21762 return true;
21765 /* Expand a signed/unsigned integral vector conditional move. */
21767 bool
21768 ix86_expand_int_vcond (rtx operands[])
21770 machine_mode data_mode = GET_MODE (operands[0]);
21771 machine_mode mode = GET_MODE (operands[4]);
21772 enum rtx_code code = GET_CODE (operands[3]);
21773 bool negate = false;
21774 rtx x, cop0, cop1;
21776 cop0 = operands[4];
21777 cop1 = operands[5];
21779 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21780 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21781 if ((code == LT || code == GE)
21782 && data_mode == mode
21783 && cop1 == CONST0_RTX (mode)
21784 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21785 && GET_MODE_UNIT_SIZE (data_mode) > 1
21786 && GET_MODE_UNIT_SIZE (data_mode) <= 8
21787 && (GET_MODE_SIZE (data_mode) == 16
21788 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21790 rtx negop = operands[2 - (code == LT)];
21791 int shift = GET_MODE_UNIT_BITSIZE (data_mode) - 1;
21792 if (negop == CONST1_RTX (data_mode))
21794 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21795 operands[0], 1, OPTAB_DIRECT);
21796 if (res != operands[0])
21797 emit_move_insn (operands[0], res);
21798 return true;
21800 else if (GET_MODE_INNER (data_mode) != DImode
21801 && vector_all_ones_operand (negop, data_mode))
21803 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21804 operands[0], 0, OPTAB_DIRECT);
21805 if (res != operands[0])
21806 emit_move_insn (operands[0], res);
21807 return true;
21811 if (!nonimmediate_operand (cop1, mode))
21812 cop1 = force_reg (mode, cop1);
21813 if (!general_operand (operands[1], data_mode))
21814 operands[1] = force_reg (data_mode, operands[1]);
21815 if (!general_operand (operands[2], data_mode))
21816 operands[2] = force_reg (data_mode, operands[2]);
21818 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21819 if (TARGET_XOP
21820 && (mode == V16QImode || mode == V8HImode
21821 || mode == V4SImode || mode == V2DImode))
21823 else
21825 /* Canonicalize the comparison to EQ, GT, GTU. */
21826 switch (code)
21828 case EQ:
21829 case GT:
21830 case GTU:
21831 break;
21833 case NE:
21834 case LE:
21835 case LEU:
21836 code = reverse_condition (code);
21837 negate = true;
21838 break;
21840 case GE:
21841 case GEU:
21842 code = reverse_condition (code);
21843 negate = true;
21844 /* FALLTHRU */
21846 case LT:
21847 case LTU:
21848 std::swap (cop0, cop1);
21849 code = swap_condition (code);
21850 break;
21852 default:
21853 gcc_unreachable ();
21856 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21857 if (mode == V2DImode)
21859 switch (code)
21861 case EQ:
21862 /* SSE4.1 supports EQ. */
21863 if (!TARGET_SSE4_1)
21864 return false;
21865 break;
21867 case GT:
21868 case GTU:
21869 /* SSE4.2 supports GT/GTU. */
21870 if (!TARGET_SSE4_2)
21871 return false;
21872 break;
21874 default:
21875 gcc_unreachable ();
21879 /* Unsigned parallel compare is not supported by the hardware.
21880 Play some tricks to turn this into a signed comparison
21881 against 0. */
21882 if (code == GTU)
21884 cop0 = force_reg (mode, cop0);
21886 switch (mode)
21888 case V16SImode:
21889 case V8DImode:
21890 case V8SImode:
21891 case V4DImode:
21892 case V4SImode:
21893 case V2DImode:
21895 rtx t1, t2, mask;
21896 rtx (*gen_sub3) (rtx, rtx, rtx);
21898 switch (mode)
21900 case V16SImode: gen_sub3 = gen_subv16si3; break;
21901 case V8DImode: gen_sub3 = gen_subv8di3; break;
21902 case V8SImode: gen_sub3 = gen_subv8si3; break;
21903 case V4DImode: gen_sub3 = gen_subv4di3; break;
21904 case V4SImode: gen_sub3 = gen_subv4si3; break;
21905 case V2DImode: gen_sub3 = gen_subv2di3; break;
21906 default:
21907 gcc_unreachable ();
21909 /* Subtract (-(INT MAX) - 1) from both operands to make
21910 them signed. */
21911 mask = ix86_build_signbit_mask (mode, true, false);
21912 t1 = gen_reg_rtx (mode);
21913 emit_insn (gen_sub3 (t1, cop0, mask));
21915 t2 = gen_reg_rtx (mode);
21916 emit_insn (gen_sub3 (t2, cop1, mask));
21918 cop0 = t1;
21919 cop1 = t2;
21920 code = GT;
21922 break;
21924 case V64QImode:
21925 case V32HImode:
21926 case V32QImode:
21927 case V16HImode:
21928 case V16QImode:
21929 case V8HImode:
21930 /* Perform a parallel unsigned saturating subtraction. */
21931 x = gen_reg_rtx (mode);
21932 emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, cop0, cop1)));
21934 cop0 = x;
21935 cop1 = CONST0_RTX (mode);
21936 code = EQ;
21937 negate = !negate;
21938 break;
21940 default:
21941 gcc_unreachable ();
21946 /* Allow the comparison to be done in one mode, but the movcc to
21947 happen in another mode. */
21948 if (data_mode == mode)
21950 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21951 operands[1+negate], operands[2-negate]);
21953 else
21955 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21956 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21957 operands[1+negate], operands[2-negate]);
21958 if (GET_MODE (x) == mode)
21959 x = gen_lowpart (data_mode, x);
21962 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21963 operands[2-negate]);
21964 return true;
21967 /* AVX512F does support 64-byte integer vector operations,
21968 thus the longest vector we are faced with is V64QImode. */
21969 #define MAX_VECT_LEN 64
21971 struct expand_vec_perm_d
21973 rtx target, op0, op1;
21974 unsigned char perm[MAX_VECT_LEN];
21975 machine_mode vmode;
21976 unsigned char nelt;
21977 bool one_operand_p;
21978 bool testing_p;
21981 static bool
21982 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
21983 struct expand_vec_perm_d *d)
21985 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
21986 expander, so args are either in d, or in op0, op1 etc. */
21987 machine_mode mode = GET_MODE (d ? d->op0 : op0);
21988 machine_mode maskmode = mode;
21989 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21991 switch (mode)
21993 case V8HImode:
21994 if (TARGET_AVX512VL && TARGET_AVX512BW)
21995 gen = gen_avx512vl_vpermi2varv8hi3;
21996 break;
21997 case V16HImode:
21998 if (TARGET_AVX512VL && TARGET_AVX512BW)
21999 gen = gen_avx512vl_vpermi2varv16hi3;
22000 break;
22001 case V64QImode:
22002 if (TARGET_AVX512VBMI)
22003 gen = gen_avx512bw_vpermi2varv64qi3;
22004 break;
22005 case V32HImode:
22006 if (TARGET_AVX512BW)
22007 gen = gen_avx512bw_vpermi2varv32hi3;
22008 break;
22009 case V4SImode:
22010 if (TARGET_AVX512VL)
22011 gen = gen_avx512vl_vpermi2varv4si3;
22012 break;
22013 case V8SImode:
22014 if (TARGET_AVX512VL)
22015 gen = gen_avx512vl_vpermi2varv8si3;
22016 break;
22017 case V16SImode:
22018 if (TARGET_AVX512F)
22019 gen = gen_avx512f_vpermi2varv16si3;
22020 break;
22021 case V4SFmode:
22022 if (TARGET_AVX512VL)
22024 gen = gen_avx512vl_vpermi2varv4sf3;
22025 maskmode = V4SImode;
22027 break;
22028 case V8SFmode:
22029 if (TARGET_AVX512VL)
22031 gen = gen_avx512vl_vpermi2varv8sf3;
22032 maskmode = V8SImode;
22034 break;
22035 case V16SFmode:
22036 if (TARGET_AVX512F)
22038 gen = gen_avx512f_vpermi2varv16sf3;
22039 maskmode = V16SImode;
22041 break;
22042 case V2DImode:
22043 if (TARGET_AVX512VL)
22044 gen = gen_avx512vl_vpermi2varv2di3;
22045 break;
22046 case V4DImode:
22047 if (TARGET_AVX512VL)
22048 gen = gen_avx512vl_vpermi2varv4di3;
22049 break;
22050 case V8DImode:
22051 if (TARGET_AVX512F)
22052 gen = gen_avx512f_vpermi2varv8di3;
22053 break;
22054 case V2DFmode:
22055 if (TARGET_AVX512VL)
22057 gen = gen_avx512vl_vpermi2varv2df3;
22058 maskmode = V2DImode;
22060 break;
22061 case V4DFmode:
22062 if (TARGET_AVX512VL)
22064 gen = gen_avx512vl_vpermi2varv4df3;
22065 maskmode = V4DImode;
22067 break;
22068 case V8DFmode:
22069 if (TARGET_AVX512F)
22071 gen = gen_avx512f_vpermi2varv8df3;
22072 maskmode = V8DImode;
22074 break;
22075 default:
22076 break;
22079 if (gen == NULL)
22080 return false;
22082 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
22083 expander, so args are either in d, or in op0, op1 etc. */
22084 if (d)
22086 rtx vec[64];
22087 target = d->target;
22088 op0 = d->op0;
22089 op1 = d->op1;
22090 for (int i = 0; i < d->nelt; ++i)
22091 vec[i] = GEN_INT (d->perm[i]);
22092 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
22095 emit_insn (gen (target, op0, force_reg (maskmode, mask), op1));
22096 return true;
22099 /* Expand a variable vector permutation. */
22101 void
22102 ix86_expand_vec_perm (rtx operands[])
22104 rtx target = operands[0];
22105 rtx op0 = operands[1];
22106 rtx op1 = operands[2];
22107 rtx mask = operands[3];
22108 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
22109 machine_mode mode = GET_MODE (op0);
22110 machine_mode maskmode = GET_MODE (mask);
22111 int w, e, i;
22112 bool one_operand_shuffle = rtx_equal_p (op0, op1);
22114 /* Number of elements in the vector. */
22115 w = GET_MODE_NUNITS (mode);
22116 e = GET_MODE_UNIT_SIZE (mode);
22117 gcc_assert (w <= 64);
22119 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL))
22120 return;
22122 if (TARGET_AVX2)
22124 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
22126 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
22127 an constant shuffle operand. With a tiny bit of effort we can
22128 use VPERMD instead. A re-interpretation stall for V4DFmode is
22129 unfortunate but there's no avoiding it.
22130 Similarly for V16HImode we don't have instructions for variable
22131 shuffling, while for V32QImode we can use after preparing suitable
22132 masks vpshufb; vpshufb; vpermq; vpor. */
22134 if (mode == V16HImode)
22136 maskmode = mode = V32QImode;
22137 w = 32;
22138 e = 1;
22140 else
22142 maskmode = mode = V8SImode;
22143 w = 8;
22144 e = 4;
22146 t1 = gen_reg_rtx (maskmode);
22148 /* Replicate the low bits of the V4DImode mask into V8SImode:
22149 mask = { A B C D }
22150 t1 = { A A B B C C D D }. */
22151 for (i = 0; i < w / 2; ++i)
22152 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
22153 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22154 vt = force_reg (maskmode, vt);
22155 mask = gen_lowpart (maskmode, mask);
22156 if (maskmode == V8SImode)
22157 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
22158 else
22159 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
22161 /* Multiply the shuffle indicies by two. */
22162 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
22163 OPTAB_DIRECT);
22165 /* Add one to the odd shuffle indicies:
22166 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
22167 for (i = 0; i < w / 2; ++i)
22169 vec[i * 2] = const0_rtx;
22170 vec[i * 2 + 1] = const1_rtx;
22172 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22173 vt = validize_mem (force_const_mem (maskmode, vt));
22174 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
22175 OPTAB_DIRECT);
22177 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
22178 operands[3] = mask = t1;
22179 target = gen_reg_rtx (mode);
22180 op0 = gen_lowpart (mode, op0);
22181 op1 = gen_lowpart (mode, op1);
22184 switch (mode)
22186 case V8SImode:
22187 /* The VPERMD and VPERMPS instructions already properly ignore
22188 the high bits of the shuffle elements. No need for us to
22189 perform an AND ourselves. */
22190 if (one_operand_shuffle)
22192 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
22193 if (target != operands[0])
22194 emit_move_insn (operands[0],
22195 gen_lowpart (GET_MODE (operands[0]), target));
22197 else
22199 t1 = gen_reg_rtx (V8SImode);
22200 t2 = gen_reg_rtx (V8SImode);
22201 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
22202 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
22203 goto merge_two;
22205 return;
22207 case V8SFmode:
22208 mask = gen_lowpart (V8SImode, mask);
22209 if (one_operand_shuffle)
22210 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
22211 else
22213 t1 = gen_reg_rtx (V8SFmode);
22214 t2 = gen_reg_rtx (V8SFmode);
22215 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
22216 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
22217 goto merge_two;
22219 return;
22221 case V4SImode:
22222 /* By combining the two 128-bit input vectors into one 256-bit
22223 input vector, we can use VPERMD and VPERMPS for the full
22224 two-operand shuffle. */
22225 t1 = gen_reg_rtx (V8SImode);
22226 t2 = gen_reg_rtx (V8SImode);
22227 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
22228 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22229 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
22230 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
22231 return;
22233 case V4SFmode:
22234 t1 = gen_reg_rtx (V8SFmode);
22235 t2 = gen_reg_rtx (V8SImode);
22236 mask = gen_lowpart (V4SImode, mask);
22237 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
22238 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22239 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
22240 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
22241 return;
22243 case V32QImode:
22244 t1 = gen_reg_rtx (V32QImode);
22245 t2 = gen_reg_rtx (V32QImode);
22246 t3 = gen_reg_rtx (V32QImode);
22247 vt2 = GEN_INT (-128);
22248 for (i = 0; i < 32; i++)
22249 vec[i] = vt2;
22250 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22251 vt = force_reg (V32QImode, vt);
22252 for (i = 0; i < 32; i++)
22253 vec[i] = i < 16 ? vt2 : const0_rtx;
22254 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22255 vt2 = force_reg (V32QImode, vt2);
22256 /* From mask create two adjusted masks, which contain the same
22257 bits as mask in the low 7 bits of each vector element.
22258 The first mask will have the most significant bit clear
22259 if it requests element from the same 128-bit lane
22260 and MSB set if it requests element from the other 128-bit lane.
22261 The second mask will have the opposite values of the MSB,
22262 and additionally will have its 128-bit lanes swapped.
22263 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
22264 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
22265 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
22266 stands for other 12 bytes. */
22267 /* The bit whether element is from the same lane or the other
22268 lane is bit 4, so shift it up by 3 to the MSB position. */
22269 t5 = gen_reg_rtx (V4DImode);
22270 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
22271 GEN_INT (3)));
22272 /* Clear MSB bits from the mask just in case it had them set. */
22273 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
22274 /* After this t1 will have MSB set for elements from other lane. */
22275 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
22276 /* Clear bits other than MSB. */
22277 emit_insn (gen_andv32qi3 (t1, t1, vt));
22278 /* Or in the lower bits from mask into t3. */
22279 emit_insn (gen_iorv32qi3 (t3, t1, t2));
22280 /* And invert MSB bits in t1, so MSB is set for elements from the same
22281 lane. */
22282 emit_insn (gen_xorv32qi3 (t1, t1, vt));
22283 /* Swap 128-bit lanes in t3. */
22284 t6 = gen_reg_rtx (V4DImode);
22285 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
22286 const2_rtx, GEN_INT (3),
22287 const0_rtx, const1_rtx));
22288 /* And or in the lower bits from mask into t1. */
22289 emit_insn (gen_iorv32qi3 (t1, t1, t2));
22290 if (one_operand_shuffle)
22292 /* Each of these shuffles will put 0s in places where
22293 element from the other 128-bit lane is needed, otherwise
22294 will shuffle in the requested value. */
22295 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
22296 gen_lowpart (V32QImode, t6)));
22297 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
22298 /* For t3 the 128-bit lanes are swapped again. */
22299 t7 = gen_reg_rtx (V4DImode);
22300 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
22301 const2_rtx, GEN_INT (3),
22302 const0_rtx, const1_rtx));
22303 /* And oring both together leads to the result. */
22304 emit_insn (gen_iorv32qi3 (target, t1,
22305 gen_lowpart (V32QImode, t7)));
22306 if (target != operands[0])
22307 emit_move_insn (operands[0],
22308 gen_lowpart (GET_MODE (operands[0]), target));
22309 return;
22312 t4 = gen_reg_rtx (V32QImode);
22313 /* Similarly to the above one_operand_shuffle code,
22314 just for repeated twice for each operand. merge_two:
22315 code will merge the two results together. */
22316 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
22317 gen_lowpart (V32QImode, t6)));
22318 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
22319 gen_lowpart (V32QImode, t6)));
22320 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
22321 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
22322 t7 = gen_reg_rtx (V4DImode);
22323 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
22324 const2_rtx, GEN_INT (3),
22325 const0_rtx, const1_rtx));
22326 t8 = gen_reg_rtx (V4DImode);
22327 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
22328 const2_rtx, GEN_INT (3),
22329 const0_rtx, const1_rtx));
22330 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
22331 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
22332 t1 = t4;
22333 t2 = t3;
22334 goto merge_two;
22336 default:
22337 gcc_assert (GET_MODE_SIZE (mode) <= 16);
22338 break;
22342 if (TARGET_XOP)
22344 /* The XOP VPPERM insn supports three inputs. By ignoring the
22345 one_operand_shuffle special case, we avoid creating another
22346 set of constant vectors in memory. */
22347 one_operand_shuffle = false;
22349 /* mask = mask & {2*w-1, ...} */
22350 vt = GEN_INT (2*w - 1);
22352 else
22354 /* mask = mask & {w-1, ...} */
22355 vt = GEN_INT (w - 1);
22358 for (i = 0; i < w; i++)
22359 vec[i] = vt;
22360 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22361 mask = expand_simple_binop (maskmode, AND, mask, vt,
22362 NULL_RTX, 0, OPTAB_DIRECT);
22364 /* For non-QImode operations, convert the word permutation control
22365 into a byte permutation control. */
22366 if (mode != V16QImode)
22368 mask = expand_simple_binop (maskmode, ASHIFT, mask,
22369 GEN_INT (exact_log2 (e)),
22370 NULL_RTX, 0, OPTAB_DIRECT);
22372 /* Convert mask to vector of chars. */
22373 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
22375 /* Replicate each of the input bytes into byte positions:
22376 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
22377 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
22378 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
22379 for (i = 0; i < 16; ++i)
22380 vec[i] = GEN_INT (i/e * e);
22381 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22382 vt = validize_mem (force_const_mem (V16QImode, vt));
22383 if (TARGET_XOP)
22384 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
22385 else
22386 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
22388 /* Convert it into the byte positions by doing
22389 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
22390 for (i = 0; i < 16; ++i)
22391 vec[i] = GEN_INT (i % e);
22392 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22393 vt = validize_mem (force_const_mem (V16QImode, vt));
22394 emit_insn (gen_addv16qi3 (mask, mask, vt));
22397 /* The actual shuffle operations all operate on V16QImode. */
22398 op0 = gen_lowpart (V16QImode, op0);
22399 op1 = gen_lowpart (V16QImode, op1);
22401 if (TARGET_XOP)
22403 if (GET_MODE (target) != V16QImode)
22404 target = gen_reg_rtx (V16QImode);
22405 emit_insn (gen_xop_pperm (target, op0, op1, mask));
22406 if (target != operands[0])
22407 emit_move_insn (operands[0],
22408 gen_lowpart (GET_MODE (operands[0]), target));
22410 else if (one_operand_shuffle)
22412 if (GET_MODE (target) != V16QImode)
22413 target = gen_reg_rtx (V16QImode);
22414 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
22415 if (target != operands[0])
22416 emit_move_insn (operands[0],
22417 gen_lowpart (GET_MODE (operands[0]), target));
22419 else
22421 rtx xops[6];
22422 bool ok;
22424 /* Shuffle the two input vectors independently. */
22425 t1 = gen_reg_rtx (V16QImode);
22426 t2 = gen_reg_rtx (V16QImode);
22427 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
22428 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
22430 merge_two:
22431 /* Then merge them together. The key is whether any given control
22432 element contained a bit set that indicates the second word. */
22433 mask = operands[3];
22434 vt = GEN_INT (w);
22435 if (maskmode == V2DImode && !TARGET_SSE4_1)
22437 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
22438 more shuffle to convert the V2DI input mask into a V4SI
22439 input mask. At which point the masking that expand_int_vcond
22440 will work as desired. */
22441 rtx t3 = gen_reg_rtx (V4SImode);
22442 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
22443 const0_rtx, const0_rtx,
22444 const2_rtx, const2_rtx));
22445 mask = t3;
22446 maskmode = V4SImode;
22447 e = w = 4;
22450 for (i = 0; i < w; i++)
22451 vec[i] = vt;
22452 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22453 vt = force_reg (maskmode, vt);
22454 mask = expand_simple_binop (maskmode, AND, mask, vt,
22455 NULL_RTX, 0, OPTAB_DIRECT);
22457 if (GET_MODE (target) != mode)
22458 target = gen_reg_rtx (mode);
22459 xops[0] = target;
22460 xops[1] = gen_lowpart (mode, t2);
22461 xops[2] = gen_lowpart (mode, t1);
22462 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
22463 xops[4] = mask;
22464 xops[5] = vt;
22465 ok = ix86_expand_int_vcond (xops);
22466 gcc_assert (ok);
22467 if (target != operands[0])
22468 emit_move_insn (operands[0],
22469 gen_lowpart (GET_MODE (operands[0]), target));
22473 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
22474 true if we should do zero extension, else sign extension. HIGH_P is
22475 true if we want the N/2 high elements, else the low elements. */
22477 void
22478 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
22480 machine_mode imode = GET_MODE (src);
22481 rtx tmp;
22483 if (TARGET_SSE4_1)
22485 rtx (*unpack)(rtx, rtx);
22486 rtx (*extract)(rtx, rtx) = NULL;
22487 machine_mode halfmode = BLKmode;
22489 switch (imode)
22491 case V64QImode:
22492 if (unsigned_p)
22493 unpack = gen_avx512bw_zero_extendv32qiv32hi2;
22494 else
22495 unpack = gen_avx512bw_sign_extendv32qiv32hi2;
22496 halfmode = V32QImode;
22497 extract
22498 = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
22499 break;
22500 case V32QImode:
22501 if (unsigned_p)
22502 unpack = gen_avx2_zero_extendv16qiv16hi2;
22503 else
22504 unpack = gen_avx2_sign_extendv16qiv16hi2;
22505 halfmode = V16QImode;
22506 extract
22507 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
22508 break;
22509 case V32HImode:
22510 if (unsigned_p)
22511 unpack = gen_avx512f_zero_extendv16hiv16si2;
22512 else
22513 unpack = gen_avx512f_sign_extendv16hiv16si2;
22514 halfmode = V16HImode;
22515 extract
22516 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
22517 break;
22518 case V16HImode:
22519 if (unsigned_p)
22520 unpack = gen_avx2_zero_extendv8hiv8si2;
22521 else
22522 unpack = gen_avx2_sign_extendv8hiv8si2;
22523 halfmode = V8HImode;
22524 extract
22525 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
22526 break;
22527 case V16SImode:
22528 if (unsigned_p)
22529 unpack = gen_avx512f_zero_extendv8siv8di2;
22530 else
22531 unpack = gen_avx512f_sign_extendv8siv8di2;
22532 halfmode = V8SImode;
22533 extract
22534 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
22535 break;
22536 case V8SImode:
22537 if (unsigned_p)
22538 unpack = gen_avx2_zero_extendv4siv4di2;
22539 else
22540 unpack = gen_avx2_sign_extendv4siv4di2;
22541 halfmode = V4SImode;
22542 extract
22543 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
22544 break;
22545 case V16QImode:
22546 if (unsigned_p)
22547 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
22548 else
22549 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
22550 break;
22551 case V8HImode:
22552 if (unsigned_p)
22553 unpack = gen_sse4_1_zero_extendv4hiv4si2;
22554 else
22555 unpack = gen_sse4_1_sign_extendv4hiv4si2;
22556 break;
22557 case V4SImode:
22558 if (unsigned_p)
22559 unpack = gen_sse4_1_zero_extendv2siv2di2;
22560 else
22561 unpack = gen_sse4_1_sign_extendv2siv2di2;
22562 break;
22563 default:
22564 gcc_unreachable ();
22567 if (GET_MODE_SIZE (imode) >= 32)
22569 tmp = gen_reg_rtx (halfmode);
22570 emit_insn (extract (tmp, src));
22572 else if (high_p)
22574 /* Shift higher 8 bytes to lower 8 bytes. */
22575 tmp = gen_reg_rtx (V1TImode);
22576 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
22577 GEN_INT (64)));
22578 tmp = gen_lowpart (imode, tmp);
22580 else
22581 tmp = src;
22583 emit_insn (unpack (dest, tmp));
22585 else
22587 rtx (*unpack)(rtx, rtx, rtx);
22589 switch (imode)
22591 case V16QImode:
22592 if (high_p)
22593 unpack = gen_vec_interleave_highv16qi;
22594 else
22595 unpack = gen_vec_interleave_lowv16qi;
22596 break;
22597 case V8HImode:
22598 if (high_p)
22599 unpack = gen_vec_interleave_highv8hi;
22600 else
22601 unpack = gen_vec_interleave_lowv8hi;
22602 break;
22603 case V4SImode:
22604 if (high_p)
22605 unpack = gen_vec_interleave_highv4si;
22606 else
22607 unpack = gen_vec_interleave_lowv4si;
22608 break;
22609 default:
22610 gcc_unreachable ();
22613 if (unsigned_p)
22614 tmp = force_reg (imode, CONST0_RTX (imode));
22615 else
22616 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
22617 src, pc_rtx, pc_rtx);
22619 rtx tmp2 = gen_reg_rtx (imode);
22620 emit_insn (unpack (tmp2, src, tmp));
22621 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
22625 /* Expand conditional increment or decrement using adb/sbb instructions.
22626 The default case using setcc followed by the conditional move can be
22627 done by generic code. */
22628 bool
22629 ix86_expand_int_addcc (rtx operands[])
22631 enum rtx_code code = GET_CODE (operands[1]);
22632 rtx flags;
22633 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
22634 rtx compare_op;
22635 rtx val = const0_rtx;
22636 bool fpcmp = false;
22637 machine_mode mode;
22638 rtx op0 = XEXP (operands[1], 0);
22639 rtx op1 = XEXP (operands[1], 1);
22641 if (operands[3] != const1_rtx
22642 && operands[3] != constm1_rtx)
22643 return false;
22644 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
22645 return false;
22646 code = GET_CODE (compare_op);
22648 flags = XEXP (compare_op, 0);
22650 if (GET_MODE (flags) == CCFPmode
22651 || GET_MODE (flags) == CCFPUmode)
22653 fpcmp = true;
22654 code = ix86_fp_compare_code_to_integer (code);
22657 if (code != LTU)
22659 val = constm1_rtx;
22660 if (fpcmp)
22661 PUT_CODE (compare_op,
22662 reverse_condition_maybe_unordered
22663 (GET_CODE (compare_op)));
22664 else
22665 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
22668 mode = GET_MODE (operands[0]);
22670 /* Construct either adc or sbb insn. */
22671 if ((code == LTU) == (operands[3] == constm1_rtx))
22673 switch (mode)
22675 case QImode:
22676 insn = gen_subqi3_carry;
22677 break;
22678 case HImode:
22679 insn = gen_subhi3_carry;
22680 break;
22681 case SImode:
22682 insn = gen_subsi3_carry;
22683 break;
22684 case DImode:
22685 insn = gen_subdi3_carry;
22686 break;
22687 default:
22688 gcc_unreachable ();
22691 else
22693 switch (mode)
22695 case QImode:
22696 insn = gen_addqi3_carry;
22697 break;
22698 case HImode:
22699 insn = gen_addhi3_carry;
22700 break;
22701 case SImode:
22702 insn = gen_addsi3_carry;
22703 break;
22704 case DImode:
22705 insn = gen_adddi3_carry;
22706 break;
22707 default:
22708 gcc_unreachable ();
22711 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
22713 return true;
22717 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22718 but works for floating pointer parameters and nonoffsetable memories.
22719 For pushes, it returns just stack offsets; the values will be saved
22720 in the right order. Maximally three parts are generated. */
22722 static int
22723 ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode)
22725 int size;
22727 if (!TARGET_64BIT)
22728 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22729 else
22730 size = (GET_MODE_SIZE (mode) + 4) / 8;
22732 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22733 gcc_assert (size >= 2 && size <= 4);
22735 /* Optimize constant pool reference to immediates. This is used by fp
22736 moves, that force all constants to memory to allow combining. */
22737 if (MEM_P (operand) && MEM_READONLY_P (operand))
22739 rtx tmp = maybe_get_pool_constant (operand);
22740 if (tmp)
22741 operand = tmp;
22744 if (MEM_P (operand) && !offsettable_memref_p (operand))
22746 /* The only non-offsetable memories we handle are pushes. */
22747 int ok = push_operand (operand, VOIDmode);
22749 gcc_assert (ok);
22751 operand = copy_rtx (operand);
22752 PUT_MODE (operand, word_mode);
22753 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22754 return size;
22757 if (GET_CODE (operand) == CONST_VECTOR)
22759 machine_mode imode = int_mode_for_mode (mode);
22760 /* Caution: if we looked through a constant pool memory above,
22761 the operand may actually have a different mode now. That's
22762 ok, since we want to pun this all the way back to an integer. */
22763 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22764 gcc_assert (operand != NULL);
22765 mode = imode;
22768 if (!TARGET_64BIT)
22770 if (mode == DImode)
22771 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22772 else
22774 int i;
22776 if (REG_P (operand))
22778 gcc_assert (reload_completed);
22779 for (i = 0; i < size; i++)
22780 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22782 else if (offsettable_memref_p (operand))
22784 operand = adjust_address (operand, SImode, 0);
22785 parts[0] = operand;
22786 for (i = 1; i < size; i++)
22787 parts[i] = adjust_address (operand, SImode, 4 * i);
22789 else if (CONST_DOUBLE_P (operand))
22791 REAL_VALUE_TYPE r;
22792 long l[4];
22794 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22795 switch (mode)
22797 case TFmode:
22798 real_to_target (l, &r, mode);
22799 parts[3] = gen_int_mode (l[3], SImode);
22800 parts[2] = gen_int_mode (l[2], SImode);
22801 break;
22802 case XFmode:
22803 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22804 long double may not be 80-bit. */
22805 real_to_target (l, &r, mode);
22806 parts[2] = gen_int_mode (l[2], SImode);
22807 break;
22808 case DFmode:
22809 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22810 break;
22811 default:
22812 gcc_unreachable ();
22814 parts[1] = gen_int_mode (l[1], SImode);
22815 parts[0] = gen_int_mode (l[0], SImode);
22817 else
22818 gcc_unreachable ();
22821 else
22823 if (mode == TImode)
22824 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22825 if (mode == XFmode || mode == TFmode)
22827 machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22828 if (REG_P (operand))
22830 gcc_assert (reload_completed);
22831 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22832 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22834 else if (offsettable_memref_p (operand))
22836 operand = adjust_address (operand, DImode, 0);
22837 parts[0] = operand;
22838 parts[1] = adjust_address (operand, upper_mode, 8);
22840 else if (CONST_DOUBLE_P (operand))
22842 REAL_VALUE_TYPE r;
22843 long l[4];
22845 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22846 real_to_target (l, &r, mode);
22848 /* real_to_target puts 32-bit pieces in each long. */
22849 parts[0] =
22850 gen_int_mode
22851 ((l[0] & (HOST_WIDE_INT) 0xffffffff)
22852 | ((l[1] & (HOST_WIDE_INT) 0xffffffff) << 32),
22853 DImode);
22855 if (upper_mode == SImode)
22856 parts[1] = gen_int_mode (l[2], SImode);
22857 else
22858 parts[1] =
22859 gen_int_mode
22860 ((l[2] & (HOST_WIDE_INT) 0xffffffff)
22861 | ((l[3] & (HOST_WIDE_INT) 0xffffffff) << 32),
22862 DImode);
22864 else
22865 gcc_unreachable ();
22869 return size;
22872 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22873 Return false when normal moves are needed; true when all required
22874 insns have been emitted. Operands 2-4 contain the input values
22875 int the correct order; operands 5-7 contain the output values. */
22877 void
22878 ix86_split_long_move (rtx operands[])
22880 rtx part[2][4];
22881 int nparts, i, j;
22882 int push = 0;
22883 int collisions = 0;
22884 machine_mode mode = GET_MODE (operands[0]);
22885 bool collisionparts[4];
22887 /* The DFmode expanders may ask us to move double.
22888 For 64bit target this is single move. By hiding the fact
22889 here we simplify i386.md splitters. */
22890 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22892 /* Optimize constant pool reference to immediates. This is used by
22893 fp moves, that force all constants to memory to allow combining. */
22895 if (MEM_P (operands[1])
22896 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22897 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22898 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22899 if (push_operand (operands[0], VOIDmode))
22901 operands[0] = copy_rtx (operands[0]);
22902 PUT_MODE (operands[0], word_mode);
22904 else
22905 operands[0] = gen_lowpart (DImode, operands[0]);
22906 operands[1] = gen_lowpart (DImode, operands[1]);
22907 emit_move_insn (operands[0], operands[1]);
22908 return;
22911 /* The only non-offsettable memory we handle is push. */
22912 if (push_operand (operands[0], VOIDmode))
22913 push = 1;
22914 else
22915 gcc_assert (!MEM_P (operands[0])
22916 || offsettable_memref_p (operands[0]));
22918 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22919 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22921 /* When emitting push, take care for source operands on the stack. */
22922 if (push && MEM_P (operands[1])
22923 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22925 rtx src_base = XEXP (part[1][nparts - 1], 0);
22927 /* Compensate for the stack decrement by 4. */
22928 if (!TARGET_64BIT && nparts == 3
22929 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22930 src_base = plus_constant (Pmode, src_base, 4);
22932 /* src_base refers to the stack pointer and is
22933 automatically decreased by emitted push. */
22934 for (i = 0; i < nparts; i++)
22935 part[1][i] = change_address (part[1][i],
22936 GET_MODE (part[1][i]), src_base);
22939 /* We need to do copy in the right order in case an address register
22940 of the source overlaps the destination. */
22941 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22943 rtx tmp;
22945 for (i = 0; i < nparts; i++)
22947 collisionparts[i]
22948 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22949 if (collisionparts[i])
22950 collisions++;
22953 /* Collision in the middle part can be handled by reordering. */
22954 if (collisions == 1 && nparts == 3 && collisionparts [1])
22956 std::swap (part[0][1], part[0][2]);
22957 std::swap (part[1][1], part[1][2]);
22959 else if (collisions == 1
22960 && nparts == 4
22961 && (collisionparts [1] || collisionparts [2]))
22963 if (collisionparts [1])
22965 std::swap (part[0][1], part[0][2]);
22966 std::swap (part[1][1], part[1][2]);
22968 else
22970 std::swap (part[0][2], part[0][3]);
22971 std::swap (part[1][2], part[1][3]);
22975 /* If there are more collisions, we can't handle it by reordering.
22976 Do an lea to the last part and use only one colliding move. */
22977 else if (collisions > 1)
22979 rtx base, addr, tls_base = NULL_RTX;
22981 collisions = 1;
22983 base = part[0][nparts - 1];
22985 /* Handle the case when the last part isn't valid for lea.
22986 Happens in 64-bit mode storing the 12-byte XFmode. */
22987 if (GET_MODE (base) != Pmode)
22988 base = gen_rtx_REG (Pmode, REGNO (base));
22990 addr = XEXP (part[1][0], 0);
22991 if (TARGET_TLS_DIRECT_SEG_REFS)
22993 struct ix86_address parts;
22994 int ok = ix86_decompose_address (addr, &parts);
22995 gcc_assert (ok);
22996 if (parts.seg == DEFAULT_TLS_SEG_REG)
22998 /* It is not valid to use %gs: or %fs: in
22999 lea though, so we need to remove it from the
23000 address used for lea and add it to each individual
23001 memory loads instead. */
23002 addr = copy_rtx (addr);
23003 rtx *x = &addr;
23004 while (GET_CODE (*x) == PLUS)
23006 for (i = 0; i < 2; i++)
23008 rtx u = XEXP (*x, i);
23009 if (GET_CODE (u) == ZERO_EXTEND)
23010 u = XEXP (u, 0);
23011 if (GET_CODE (u) == UNSPEC
23012 && XINT (u, 1) == UNSPEC_TP)
23014 tls_base = XEXP (*x, i);
23015 *x = XEXP (*x, 1 - i);
23016 break;
23019 if (tls_base)
23020 break;
23021 x = &XEXP (*x, 0);
23023 gcc_assert (tls_base);
23026 emit_insn (gen_rtx_SET (base, addr));
23027 if (tls_base)
23028 base = gen_rtx_PLUS (GET_MODE (base), base, tls_base);
23029 part[1][0] = replace_equiv_address (part[1][0], base);
23030 for (i = 1; i < nparts; i++)
23032 if (tls_base)
23033 base = copy_rtx (base);
23034 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
23035 part[1][i] = replace_equiv_address (part[1][i], tmp);
23040 if (push)
23042 if (!TARGET_64BIT)
23044 if (nparts == 3)
23046 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
23047 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
23048 stack_pointer_rtx, GEN_INT (-4)));
23049 emit_move_insn (part[0][2], part[1][2]);
23051 else if (nparts == 4)
23053 emit_move_insn (part[0][3], part[1][3]);
23054 emit_move_insn (part[0][2], part[1][2]);
23057 else
23059 /* In 64bit mode we don't have 32bit push available. In case this is
23060 register, it is OK - we will just use larger counterpart. We also
23061 retype memory - these comes from attempt to avoid REX prefix on
23062 moving of second half of TFmode value. */
23063 if (GET_MODE (part[1][1]) == SImode)
23065 switch (GET_CODE (part[1][1]))
23067 case MEM:
23068 part[1][1] = adjust_address (part[1][1], DImode, 0);
23069 break;
23071 case REG:
23072 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
23073 break;
23075 default:
23076 gcc_unreachable ();
23079 if (GET_MODE (part[1][0]) == SImode)
23080 part[1][0] = part[1][1];
23083 emit_move_insn (part[0][1], part[1][1]);
23084 emit_move_insn (part[0][0], part[1][0]);
23085 return;
23088 /* Choose correct order to not overwrite the source before it is copied. */
23089 if ((REG_P (part[0][0])
23090 && REG_P (part[1][1])
23091 && (REGNO (part[0][0]) == REGNO (part[1][1])
23092 || (nparts == 3
23093 && REGNO (part[0][0]) == REGNO (part[1][2]))
23094 || (nparts == 4
23095 && REGNO (part[0][0]) == REGNO (part[1][3]))))
23096 || (collisions > 0
23097 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
23099 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
23101 operands[2 + i] = part[0][j];
23102 operands[6 + i] = part[1][j];
23105 else
23107 for (i = 0; i < nparts; i++)
23109 operands[2 + i] = part[0][i];
23110 operands[6 + i] = part[1][i];
23114 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
23115 if (optimize_insn_for_size_p ())
23117 for (j = 0; j < nparts - 1; j++)
23118 if (CONST_INT_P (operands[6 + j])
23119 && operands[6 + j] != const0_rtx
23120 && REG_P (operands[2 + j]))
23121 for (i = j; i < nparts - 1; i++)
23122 if (CONST_INT_P (operands[7 + i])
23123 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
23124 operands[7 + i] = operands[2 + j];
23127 for (i = 0; i < nparts; i++)
23128 emit_move_insn (operands[2 + i], operands[6 + i]);
23130 return;
23133 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
23134 left shift by a constant, either using a single shift or
23135 a sequence of add instructions. */
23137 static void
23138 ix86_expand_ashl_const (rtx operand, int count, machine_mode mode)
23140 rtx (*insn)(rtx, rtx, rtx);
23142 if (count == 1
23143 || (count * ix86_cost->add <= ix86_cost->shift_const
23144 && !optimize_insn_for_size_p ()))
23146 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
23147 while (count-- > 0)
23148 emit_insn (insn (operand, operand, operand));
23150 else
23152 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
23153 emit_insn (insn (operand, operand, GEN_INT (count)));
23157 void
23158 ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode)
23160 rtx (*gen_ashl3)(rtx, rtx, rtx);
23161 rtx (*gen_shld)(rtx, rtx, rtx);
23162 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23164 rtx low[2], high[2];
23165 int count;
23167 if (CONST_INT_P (operands[2]))
23169 split_double_mode (mode, operands, 2, low, high);
23170 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23172 if (count >= half_width)
23174 emit_move_insn (high[0], low[1]);
23175 emit_move_insn (low[0], const0_rtx);
23177 if (count > half_width)
23178 ix86_expand_ashl_const (high[0], count - half_width, mode);
23180 else
23182 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23184 if (!rtx_equal_p (operands[0], operands[1]))
23185 emit_move_insn (operands[0], operands[1]);
23187 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
23188 ix86_expand_ashl_const (low[0], count, mode);
23190 return;
23193 split_double_mode (mode, operands, 1, low, high);
23195 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
23197 if (operands[1] == const1_rtx)
23199 /* Assuming we've chosen a QImode capable registers, then 1 << N
23200 can be done with two 32/64-bit shifts, no branches, no cmoves. */
23201 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
23203 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
23205 ix86_expand_clear (low[0]);
23206 ix86_expand_clear (high[0]);
23207 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
23209 d = gen_lowpart (QImode, low[0]);
23210 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23211 s = gen_rtx_EQ (QImode, flags, const0_rtx);
23212 emit_insn (gen_rtx_SET (d, s));
23214 d = gen_lowpart (QImode, high[0]);
23215 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23216 s = gen_rtx_NE (QImode, flags, const0_rtx);
23217 emit_insn (gen_rtx_SET (d, s));
23220 /* Otherwise, we can get the same results by manually performing
23221 a bit extract operation on bit 5/6, and then performing the two
23222 shifts. The two methods of getting 0/1 into low/high are exactly
23223 the same size. Avoiding the shift in the bit extract case helps
23224 pentium4 a bit; no one else seems to care much either way. */
23225 else
23227 machine_mode half_mode;
23228 rtx (*gen_lshr3)(rtx, rtx, rtx);
23229 rtx (*gen_and3)(rtx, rtx, rtx);
23230 rtx (*gen_xor3)(rtx, rtx, rtx);
23231 HOST_WIDE_INT bits;
23232 rtx x;
23234 if (mode == DImode)
23236 half_mode = SImode;
23237 gen_lshr3 = gen_lshrsi3;
23238 gen_and3 = gen_andsi3;
23239 gen_xor3 = gen_xorsi3;
23240 bits = 5;
23242 else
23244 half_mode = DImode;
23245 gen_lshr3 = gen_lshrdi3;
23246 gen_and3 = gen_anddi3;
23247 gen_xor3 = gen_xordi3;
23248 bits = 6;
23251 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
23252 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
23253 else
23254 x = gen_lowpart (half_mode, operands[2]);
23255 emit_insn (gen_rtx_SET (high[0], x));
23257 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
23258 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
23259 emit_move_insn (low[0], high[0]);
23260 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
23263 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23264 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
23265 return;
23268 if (operands[1] == constm1_rtx)
23270 /* For -1 << N, we can avoid the shld instruction, because we
23271 know that we're shifting 0...31/63 ones into a -1. */
23272 emit_move_insn (low[0], constm1_rtx);
23273 if (optimize_insn_for_size_p ())
23274 emit_move_insn (high[0], low[0]);
23275 else
23276 emit_move_insn (high[0], constm1_rtx);
23278 else
23280 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23282 if (!rtx_equal_p (operands[0], operands[1]))
23283 emit_move_insn (operands[0], operands[1]);
23285 split_double_mode (mode, operands, 1, low, high);
23286 emit_insn (gen_shld (high[0], low[0], operands[2]));
23289 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23291 if (TARGET_CMOVE && scratch)
23293 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23294 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23296 ix86_expand_clear (scratch);
23297 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
23299 else
23301 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23302 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23304 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
23308 void
23309 ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode)
23311 rtx (*gen_ashr3)(rtx, rtx, rtx)
23312 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
23313 rtx (*gen_shrd)(rtx, rtx, rtx);
23314 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23316 rtx low[2], high[2];
23317 int count;
23319 if (CONST_INT_P (operands[2]))
23321 split_double_mode (mode, operands, 2, low, high);
23322 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23324 if (count == GET_MODE_BITSIZE (mode) - 1)
23326 emit_move_insn (high[0], high[1]);
23327 emit_insn (gen_ashr3 (high[0], high[0],
23328 GEN_INT (half_width - 1)));
23329 emit_move_insn (low[0], high[0]);
23332 else if (count >= half_width)
23334 emit_move_insn (low[0], high[1]);
23335 emit_move_insn (high[0], low[0]);
23336 emit_insn (gen_ashr3 (high[0], high[0],
23337 GEN_INT (half_width - 1)));
23339 if (count > half_width)
23340 emit_insn (gen_ashr3 (low[0], low[0],
23341 GEN_INT (count - half_width)));
23343 else
23345 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23347 if (!rtx_equal_p (operands[0], operands[1]))
23348 emit_move_insn (operands[0], operands[1]);
23350 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23351 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
23354 else
23356 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23358 if (!rtx_equal_p (operands[0], operands[1]))
23359 emit_move_insn (operands[0], operands[1]);
23361 split_double_mode (mode, operands, 1, low, high);
23363 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23364 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
23366 if (TARGET_CMOVE && scratch)
23368 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23369 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23371 emit_move_insn (scratch, high[0]);
23372 emit_insn (gen_ashr3 (scratch, scratch,
23373 GEN_INT (half_width - 1)));
23374 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23375 scratch));
23377 else
23379 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
23380 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
23382 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
23387 void
23388 ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
23390 rtx (*gen_lshr3)(rtx, rtx, rtx)
23391 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
23392 rtx (*gen_shrd)(rtx, rtx, rtx);
23393 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23395 rtx low[2], high[2];
23396 int count;
23398 if (CONST_INT_P (operands[2]))
23400 split_double_mode (mode, operands, 2, low, high);
23401 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23403 if (count >= half_width)
23405 emit_move_insn (low[0], high[1]);
23406 ix86_expand_clear (high[0]);
23408 if (count > half_width)
23409 emit_insn (gen_lshr3 (low[0], low[0],
23410 GEN_INT (count - half_width)));
23412 else
23414 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23416 if (!rtx_equal_p (operands[0], operands[1]))
23417 emit_move_insn (operands[0], operands[1]);
23419 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23420 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
23423 else
23425 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23427 if (!rtx_equal_p (operands[0], operands[1]))
23428 emit_move_insn (operands[0], operands[1]);
23430 split_double_mode (mode, operands, 1, low, high);
23432 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23433 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
23435 if (TARGET_CMOVE && scratch)
23437 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23438 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23440 ix86_expand_clear (scratch);
23441 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23442 scratch));
23444 else
23446 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23447 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23449 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
23454 /* Predict just emitted jump instruction to be taken with probability PROB. */
23455 static void
23456 predict_jump (int prob)
23458 rtx insn = get_last_insn ();
23459 gcc_assert (JUMP_P (insn));
23460 add_int_reg_note (insn, REG_BR_PROB, prob);
23463 /* Helper function for the string operations below. Dest VARIABLE whether
23464 it is aligned to VALUE bytes. If true, jump to the label. */
23465 static rtx_code_label *
23466 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
23468 rtx_code_label *label = gen_label_rtx ();
23469 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
23470 if (GET_MODE (variable) == DImode)
23471 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
23472 else
23473 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
23474 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
23475 1, label);
23476 if (epilogue)
23477 predict_jump (REG_BR_PROB_BASE * 50 / 100);
23478 else
23479 predict_jump (REG_BR_PROB_BASE * 90 / 100);
23480 return label;
23483 /* Adjust COUNTER by the VALUE. */
23484 static void
23485 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
23487 rtx (*gen_add)(rtx, rtx, rtx)
23488 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
23490 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
23493 /* Zero extend possibly SImode EXP to Pmode register. */
23495 ix86_zero_extend_to_Pmode (rtx exp)
23497 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
23500 /* Divide COUNTREG by SCALE. */
23501 static rtx
23502 scale_counter (rtx countreg, int scale)
23504 rtx sc;
23506 if (scale == 1)
23507 return countreg;
23508 if (CONST_INT_P (countreg))
23509 return GEN_INT (INTVAL (countreg) / scale);
23510 gcc_assert (REG_P (countreg));
23512 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
23513 GEN_INT (exact_log2 (scale)),
23514 NULL, 1, OPTAB_DIRECT);
23515 return sc;
23518 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
23519 DImode for constant loop counts. */
23521 static machine_mode
23522 counter_mode (rtx count_exp)
23524 if (GET_MODE (count_exp) != VOIDmode)
23525 return GET_MODE (count_exp);
23526 if (!CONST_INT_P (count_exp))
23527 return Pmode;
23528 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
23529 return DImode;
23530 return SImode;
23533 /* Copy the address to a Pmode register. This is used for x32 to
23534 truncate DImode TLS address to a SImode register. */
23536 static rtx
23537 ix86_copy_addr_to_reg (rtx addr)
23539 rtx reg;
23540 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
23542 reg = copy_addr_to_reg (addr);
23543 REG_POINTER (reg) = 1;
23544 return reg;
23546 else
23548 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
23549 reg = copy_to_mode_reg (DImode, addr);
23550 REG_POINTER (reg) = 1;
23551 return gen_rtx_SUBREG (SImode, reg, 0);
23555 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
23556 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
23557 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
23558 memory by VALUE (supposed to be in MODE).
23560 The size is rounded down to whole number of chunk size moved at once.
23561 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
23564 static void
23565 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
23566 rtx destptr, rtx srcptr, rtx value,
23567 rtx count, machine_mode mode, int unroll,
23568 int expected_size, bool issetmem)
23570 rtx_code_label *out_label, *top_label;
23571 rtx iter, tmp;
23572 machine_mode iter_mode = counter_mode (count);
23573 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
23574 rtx piece_size = GEN_INT (piece_size_n);
23575 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
23576 rtx size;
23577 int i;
23579 top_label = gen_label_rtx ();
23580 out_label = gen_label_rtx ();
23581 iter = gen_reg_rtx (iter_mode);
23583 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
23584 NULL, 1, OPTAB_DIRECT);
23585 /* Those two should combine. */
23586 if (piece_size == const1_rtx)
23588 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
23589 true, out_label);
23590 predict_jump (REG_BR_PROB_BASE * 10 / 100);
23592 emit_move_insn (iter, const0_rtx);
23594 emit_label (top_label);
23596 tmp = convert_modes (Pmode, iter_mode, iter, true);
23598 /* This assert could be relaxed - in this case we'll need to compute
23599 smallest power of two, containing in PIECE_SIZE_N and pass it to
23600 offset_address. */
23601 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
23602 destmem = offset_address (destmem, tmp, piece_size_n);
23603 destmem = adjust_address (destmem, mode, 0);
23605 if (!issetmem)
23607 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
23608 srcmem = adjust_address (srcmem, mode, 0);
23610 /* When unrolling for chips that reorder memory reads and writes,
23611 we can save registers by using single temporary.
23612 Also using 4 temporaries is overkill in 32bit mode. */
23613 if (!TARGET_64BIT && 0)
23615 for (i = 0; i < unroll; i++)
23617 if (i)
23619 destmem =
23620 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23621 srcmem =
23622 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23624 emit_move_insn (destmem, srcmem);
23627 else
23629 rtx tmpreg[4];
23630 gcc_assert (unroll <= 4);
23631 for (i = 0; i < unroll; i++)
23633 tmpreg[i] = gen_reg_rtx (mode);
23634 if (i)
23636 srcmem =
23637 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23639 emit_move_insn (tmpreg[i], srcmem);
23641 for (i = 0; i < unroll; i++)
23643 if (i)
23645 destmem =
23646 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23648 emit_move_insn (destmem, tmpreg[i]);
23652 else
23653 for (i = 0; i < unroll; i++)
23655 if (i)
23656 destmem =
23657 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23658 emit_move_insn (destmem, value);
23661 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
23662 true, OPTAB_LIB_WIDEN);
23663 if (tmp != iter)
23664 emit_move_insn (iter, tmp);
23666 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
23667 true, top_label);
23668 if (expected_size != -1)
23670 expected_size /= GET_MODE_SIZE (mode) * unroll;
23671 if (expected_size == 0)
23672 predict_jump (0);
23673 else if (expected_size > REG_BR_PROB_BASE)
23674 predict_jump (REG_BR_PROB_BASE - 1);
23675 else
23676 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
23678 else
23679 predict_jump (REG_BR_PROB_BASE * 80 / 100);
23680 iter = ix86_zero_extend_to_Pmode (iter);
23681 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
23682 true, OPTAB_LIB_WIDEN);
23683 if (tmp != destptr)
23684 emit_move_insn (destptr, tmp);
23685 if (!issetmem)
23687 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
23688 true, OPTAB_LIB_WIDEN);
23689 if (tmp != srcptr)
23690 emit_move_insn (srcptr, tmp);
23692 emit_label (out_label);
23695 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
23696 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
23697 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
23698 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
23699 ORIG_VALUE is the original value passed to memset to fill the memory with.
23700 Other arguments have same meaning as for previous function. */
23702 static void
23703 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
23704 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
23705 rtx count,
23706 machine_mode mode, bool issetmem)
23708 rtx destexp;
23709 rtx srcexp;
23710 rtx countreg;
23711 HOST_WIDE_INT rounded_count;
23713 /* If possible, it is shorter to use rep movs.
23714 TODO: Maybe it is better to move this logic to decide_alg. */
23715 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
23716 && (!issetmem || orig_value == const0_rtx))
23717 mode = SImode;
23719 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
23720 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
23722 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
23723 GET_MODE_SIZE (mode)));
23724 if (mode != QImode)
23726 destexp = gen_rtx_ASHIFT (Pmode, countreg,
23727 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23728 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
23730 else
23731 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
23732 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
23734 rounded_count = (INTVAL (count)
23735 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23736 destmem = shallow_copy_rtx (destmem);
23737 set_mem_size (destmem, rounded_count);
23739 else if (MEM_SIZE_KNOWN_P (destmem))
23740 clear_mem_size (destmem);
23742 if (issetmem)
23744 value = force_reg (mode, gen_lowpart (mode, value));
23745 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
23747 else
23749 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
23750 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
23751 if (mode != QImode)
23753 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
23754 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23755 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
23757 else
23758 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23759 if (CONST_INT_P (count))
23761 rounded_count = (INTVAL (count)
23762 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23763 srcmem = shallow_copy_rtx (srcmem);
23764 set_mem_size (srcmem, rounded_count);
23766 else
23768 if (MEM_SIZE_KNOWN_P (srcmem))
23769 clear_mem_size (srcmem);
23771 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23772 destexp, srcexp));
23776 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23777 DESTMEM.
23778 SRC is passed by pointer to be updated on return.
23779 Return value is updated DST. */
23780 static rtx
23781 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23782 HOST_WIDE_INT size_to_move)
23784 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23785 enum insn_code code;
23786 machine_mode move_mode;
23787 int piece_size, i;
23789 /* Find the widest mode in which we could perform moves.
23790 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23791 it until move of such size is supported. */
23792 piece_size = 1 << floor_log2 (size_to_move);
23793 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23794 code = optab_handler (mov_optab, move_mode);
23795 while (code == CODE_FOR_nothing && piece_size > 1)
23797 piece_size >>= 1;
23798 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23799 code = optab_handler (mov_optab, move_mode);
23802 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23803 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23804 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23806 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23807 move_mode = mode_for_vector (word_mode, nunits);
23808 code = optab_handler (mov_optab, move_mode);
23809 if (code == CODE_FOR_nothing)
23811 move_mode = word_mode;
23812 piece_size = GET_MODE_SIZE (move_mode);
23813 code = optab_handler (mov_optab, move_mode);
23816 gcc_assert (code != CODE_FOR_nothing);
23818 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23819 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23821 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23822 gcc_assert (size_to_move % piece_size == 0);
23823 adjust = GEN_INT (piece_size);
23824 for (i = 0; i < size_to_move; i += piece_size)
23826 /* We move from memory to memory, so we'll need to do it via
23827 a temporary register. */
23828 tempreg = gen_reg_rtx (move_mode);
23829 emit_insn (GEN_FCN (code) (tempreg, src));
23830 emit_insn (GEN_FCN (code) (dst, tempreg));
23832 emit_move_insn (destptr,
23833 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23834 emit_move_insn (srcptr,
23835 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23837 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23838 piece_size);
23839 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23840 piece_size);
23843 /* Update DST and SRC rtx. */
23844 *srcmem = src;
23845 return dst;
23848 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23849 static void
23850 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23851 rtx destptr, rtx srcptr, rtx count, int max_size)
23853 rtx src, dest;
23854 if (CONST_INT_P (count))
23856 HOST_WIDE_INT countval = INTVAL (count);
23857 HOST_WIDE_INT epilogue_size = countval % max_size;
23858 int i;
23860 /* For now MAX_SIZE should be a power of 2. This assert could be
23861 relaxed, but it'll require a bit more complicated epilogue
23862 expanding. */
23863 gcc_assert ((max_size & (max_size - 1)) == 0);
23864 for (i = max_size; i >= 1; i >>= 1)
23866 if (epilogue_size & i)
23867 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23869 return;
23871 if (max_size > 8)
23873 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23874 count, 1, OPTAB_DIRECT);
23875 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23876 count, QImode, 1, 4, false);
23877 return;
23880 /* When there are stringops, we can cheaply increase dest and src pointers.
23881 Otherwise we save code size by maintaining offset (zero is readily
23882 available from preceding rep operation) and using x86 addressing modes.
23884 if (TARGET_SINGLE_STRINGOP)
23886 if (max_size > 4)
23888 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23889 src = change_address (srcmem, SImode, srcptr);
23890 dest = change_address (destmem, SImode, destptr);
23891 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23892 emit_label (label);
23893 LABEL_NUSES (label) = 1;
23895 if (max_size > 2)
23897 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23898 src = change_address (srcmem, HImode, srcptr);
23899 dest = change_address (destmem, HImode, destptr);
23900 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23901 emit_label (label);
23902 LABEL_NUSES (label) = 1;
23904 if (max_size > 1)
23906 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23907 src = change_address (srcmem, QImode, srcptr);
23908 dest = change_address (destmem, QImode, destptr);
23909 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23910 emit_label (label);
23911 LABEL_NUSES (label) = 1;
23914 else
23916 rtx offset = force_reg (Pmode, const0_rtx);
23917 rtx tmp;
23919 if (max_size > 4)
23921 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23922 src = change_address (srcmem, SImode, srcptr);
23923 dest = change_address (destmem, SImode, destptr);
23924 emit_move_insn (dest, src);
23925 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23926 true, OPTAB_LIB_WIDEN);
23927 if (tmp != offset)
23928 emit_move_insn (offset, tmp);
23929 emit_label (label);
23930 LABEL_NUSES (label) = 1;
23932 if (max_size > 2)
23934 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23935 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23936 src = change_address (srcmem, HImode, tmp);
23937 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23938 dest = change_address (destmem, HImode, tmp);
23939 emit_move_insn (dest, src);
23940 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23941 true, OPTAB_LIB_WIDEN);
23942 if (tmp != offset)
23943 emit_move_insn (offset, tmp);
23944 emit_label (label);
23945 LABEL_NUSES (label) = 1;
23947 if (max_size > 1)
23949 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23950 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23951 src = change_address (srcmem, QImode, tmp);
23952 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23953 dest = change_address (destmem, QImode, tmp);
23954 emit_move_insn (dest, src);
23955 emit_label (label);
23956 LABEL_NUSES (label) = 1;
23961 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23962 with value PROMOTED_VAL.
23963 SRC is passed by pointer to be updated on return.
23964 Return value is updated DST. */
23965 static rtx
23966 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23967 HOST_WIDE_INT size_to_move)
23969 rtx dst = destmem, adjust;
23970 enum insn_code code;
23971 machine_mode move_mode;
23972 int piece_size, i;
23974 /* Find the widest mode in which we could perform moves.
23975 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23976 it until move of such size is supported. */
23977 move_mode = GET_MODE (promoted_val);
23978 if (move_mode == VOIDmode)
23979 move_mode = QImode;
23980 if (size_to_move < GET_MODE_SIZE (move_mode))
23982 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23983 promoted_val = gen_lowpart (move_mode, promoted_val);
23985 piece_size = GET_MODE_SIZE (move_mode);
23986 code = optab_handler (mov_optab, move_mode);
23987 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23989 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23991 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23992 gcc_assert (size_to_move % piece_size == 0);
23993 adjust = GEN_INT (piece_size);
23994 for (i = 0; i < size_to_move; i += piece_size)
23996 if (piece_size <= GET_MODE_SIZE (word_mode))
23998 emit_insn (gen_strset (destptr, dst, promoted_val));
23999 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
24000 piece_size);
24001 continue;
24004 emit_insn (GEN_FCN (code) (dst, promoted_val));
24006 emit_move_insn (destptr,
24007 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
24009 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
24010 piece_size);
24013 /* Update DST rtx. */
24014 return dst;
24016 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
24017 static void
24018 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
24019 rtx count, int max_size)
24021 count =
24022 expand_simple_binop (counter_mode (count), AND, count,
24023 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
24024 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
24025 gen_lowpart (QImode, value), count, QImode,
24026 1, max_size / 2, true);
24029 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
24030 static void
24031 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
24032 rtx count, int max_size)
24034 rtx dest;
24036 if (CONST_INT_P (count))
24038 HOST_WIDE_INT countval = INTVAL (count);
24039 HOST_WIDE_INT epilogue_size = countval % max_size;
24040 int i;
24042 /* For now MAX_SIZE should be a power of 2. This assert could be
24043 relaxed, but it'll require a bit more complicated epilogue
24044 expanding. */
24045 gcc_assert ((max_size & (max_size - 1)) == 0);
24046 for (i = max_size; i >= 1; i >>= 1)
24048 if (epilogue_size & i)
24050 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
24051 destmem = emit_memset (destmem, destptr, vec_value, i);
24052 else
24053 destmem = emit_memset (destmem, destptr, value, i);
24056 return;
24058 if (max_size > 32)
24060 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
24061 return;
24063 if (max_size > 16)
24065 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
24066 if (TARGET_64BIT)
24068 dest = change_address (destmem, DImode, destptr);
24069 emit_insn (gen_strset (destptr, dest, value));
24070 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
24071 emit_insn (gen_strset (destptr, dest, value));
24073 else
24075 dest = change_address (destmem, SImode, destptr);
24076 emit_insn (gen_strset (destptr, dest, value));
24077 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
24078 emit_insn (gen_strset (destptr, dest, value));
24079 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
24080 emit_insn (gen_strset (destptr, dest, value));
24081 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
24082 emit_insn (gen_strset (destptr, dest, value));
24084 emit_label (label);
24085 LABEL_NUSES (label) = 1;
24087 if (max_size > 8)
24089 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
24090 if (TARGET_64BIT)
24092 dest = change_address (destmem, DImode, destptr);
24093 emit_insn (gen_strset (destptr, dest, value));
24095 else
24097 dest = change_address (destmem, SImode, destptr);
24098 emit_insn (gen_strset (destptr, dest, value));
24099 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
24100 emit_insn (gen_strset (destptr, dest, value));
24102 emit_label (label);
24103 LABEL_NUSES (label) = 1;
24105 if (max_size > 4)
24107 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
24108 dest = change_address (destmem, SImode, destptr);
24109 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
24110 emit_label (label);
24111 LABEL_NUSES (label) = 1;
24113 if (max_size > 2)
24115 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
24116 dest = change_address (destmem, HImode, destptr);
24117 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
24118 emit_label (label);
24119 LABEL_NUSES (label) = 1;
24121 if (max_size > 1)
24123 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
24124 dest = change_address (destmem, QImode, destptr);
24125 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
24126 emit_label (label);
24127 LABEL_NUSES (label) = 1;
24131 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
24132 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
24133 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
24134 ignored.
24135 Return value is updated DESTMEM. */
24136 static rtx
24137 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
24138 rtx destptr, rtx srcptr, rtx value,
24139 rtx vec_value, rtx count, int align,
24140 int desired_alignment, bool issetmem)
24142 int i;
24143 for (i = 1; i < desired_alignment; i <<= 1)
24145 if (align <= i)
24147 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
24148 if (issetmem)
24150 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
24151 destmem = emit_memset (destmem, destptr, vec_value, i);
24152 else
24153 destmem = emit_memset (destmem, destptr, value, i);
24155 else
24156 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
24157 ix86_adjust_counter (count, i);
24158 emit_label (label);
24159 LABEL_NUSES (label) = 1;
24160 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
24163 return destmem;
24166 /* Test if COUNT&SIZE is nonzero and if so, expand movme
24167 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
24168 and jump to DONE_LABEL. */
24169 static void
24170 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
24171 rtx destptr, rtx srcptr,
24172 rtx value, rtx vec_value,
24173 rtx count, int size,
24174 rtx done_label, bool issetmem)
24176 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
24177 machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
24178 rtx modesize;
24179 int n;
24181 /* If we do not have vector value to copy, we must reduce size. */
24182 if (issetmem)
24184 if (!vec_value)
24186 if (GET_MODE (value) == VOIDmode && size > 8)
24187 mode = Pmode;
24188 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
24189 mode = GET_MODE (value);
24191 else
24192 mode = GET_MODE (vec_value), value = vec_value;
24194 else
24196 /* Choose appropriate vector mode. */
24197 if (size >= 32)
24198 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
24199 else if (size >= 16)
24200 mode = TARGET_SSE ? V16QImode : DImode;
24201 srcmem = change_address (srcmem, mode, srcptr);
24203 destmem = change_address (destmem, mode, destptr);
24204 modesize = GEN_INT (GET_MODE_SIZE (mode));
24205 gcc_assert (GET_MODE_SIZE (mode) <= size);
24206 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24208 if (issetmem)
24209 emit_move_insn (destmem, gen_lowpart (mode, value));
24210 else
24212 emit_move_insn (destmem, srcmem);
24213 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24215 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24218 destmem = offset_address (destmem, count, 1);
24219 destmem = offset_address (destmem, GEN_INT (-2 * size),
24220 GET_MODE_SIZE (mode));
24221 if (!issetmem)
24223 srcmem = offset_address (srcmem, count, 1);
24224 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
24225 GET_MODE_SIZE (mode));
24227 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24229 if (issetmem)
24230 emit_move_insn (destmem, gen_lowpart (mode, value));
24231 else
24233 emit_move_insn (destmem, srcmem);
24234 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24236 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24238 emit_jump_insn (gen_jump (done_label));
24239 emit_barrier ();
24241 emit_label (label);
24242 LABEL_NUSES (label) = 1;
24245 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
24246 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
24247 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
24248 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
24249 DONE_LABEL is a label after the whole copying sequence. The label is created
24250 on demand if *DONE_LABEL is NULL.
24251 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
24252 bounds after the initial copies.
24254 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
24255 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
24256 we will dispatch to a library call for large blocks.
24258 In pseudocode we do:
24260 if (COUNT < SIZE)
24262 Assume that SIZE is 4. Bigger sizes are handled analogously
24263 if (COUNT & 4)
24265 copy 4 bytes from SRCPTR to DESTPTR
24266 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
24267 goto done_label
24269 if (!COUNT)
24270 goto done_label;
24271 copy 1 byte from SRCPTR to DESTPTR
24272 if (COUNT & 2)
24274 copy 2 bytes from SRCPTR to DESTPTR
24275 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
24278 else
24280 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
24281 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
24283 OLD_DESPTR = DESTPTR;
24284 Align DESTPTR up to DESIRED_ALIGN
24285 SRCPTR += DESTPTR - OLD_DESTPTR
24286 COUNT -= DEST_PTR - OLD_DESTPTR
24287 if (DYNAMIC_CHECK)
24288 Round COUNT down to multiple of SIZE
24289 << optional caller supplied zero size guard is here >>
24290 << optional caller suppplied dynamic check is here >>
24291 << caller supplied main copy loop is here >>
24293 done_label:
24295 static void
24296 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
24297 rtx *destptr, rtx *srcptr,
24298 machine_mode mode,
24299 rtx value, rtx vec_value,
24300 rtx *count,
24301 rtx_code_label **done_label,
24302 int size,
24303 int desired_align,
24304 int align,
24305 unsigned HOST_WIDE_INT *min_size,
24306 bool dynamic_check,
24307 bool issetmem)
24309 rtx_code_label *loop_label = NULL, *label;
24310 int n;
24311 rtx modesize;
24312 int prolog_size = 0;
24313 rtx mode_value;
24315 /* Chose proper value to copy. */
24316 if (issetmem && VECTOR_MODE_P (mode))
24317 mode_value = vec_value;
24318 else
24319 mode_value = value;
24320 gcc_assert (GET_MODE_SIZE (mode) <= size);
24322 /* See if block is big or small, handle small blocks. */
24323 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
24325 int size2 = size;
24326 loop_label = gen_label_rtx ();
24328 if (!*done_label)
24329 *done_label = gen_label_rtx ();
24331 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
24332 1, loop_label);
24333 size2 >>= 1;
24335 /* Handle sizes > 3. */
24336 for (;size2 > 2; size2 >>= 1)
24337 expand_small_movmem_or_setmem (destmem, srcmem,
24338 *destptr, *srcptr,
24339 value, vec_value,
24340 *count,
24341 size2, *done_label, issetmem);
24342 /* Nothing to copy? Jump to DONE_LABEL if so */
24343 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
24344 1, *done_label);
24346 /* Do a byte copy. */
24347 destmem = change_address (destmem, QImode, *destptr);
24348 if (issetmem)
24349 emit_move_insn (destmem, gen_lowpart (QImode, value));
24350 else
24352 srcmem = change_address (srcmem, QImode, *srcptr);
24353 emit_move_insn (destmem, srcmem);
24356 /* Handle sizes 2 and 3. */
24357 label = ix86_expand_aligntest (*count, 2, false);
24358 destmem = change_address (destmem, HImode, *destptr);
24359 destmem = offset_address (destmem, *count, 1);
24360 destmem = offset_address (destmem, GEN_INT (-2), 2);
24361 if (issetmem)
24362 emit_move_insn (destmem, gen_lowpart (HImode, value));
24363 else
24365 srcmem = change_address (srcmem, HImode, *srcptr);
24366 srcmem = offset_address (srcmem, *count, 1);
24367 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
24368 emit_move_insn (destmem, srcmem);
24371 emit_label (label);
24372 LABEL_NUSES (label) = 1;
24373 emit_jump_insn (gen_jump (*done_label));
24374 emit_barrier ();
24376 else
24377 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
24378 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
24380 /* Start memcpy for COUNT >= SIZE. */
24381 if (loop_label)
24383 emit_label (loop_label);
24384 LABEL_NUSES (loop_label) = 1;
24387 /* Copy first desired_align bytes. */
24388 if (!issetmem)
24389 srcmem = change_address (srcmem, mode, *srcptr);
24390 destmem = change_address (destmem, mode, *destptr);
24391 modesize = GEN_INT (GET_MODE_SIZE (mode));
24392 for (n = 0; prolog_size < desired_align - align; n++)
24394 if (issetmem)
24395 emit_move_insn (destmem, mode_value);
24396 else
24398 emit_move_insn (destmem, srcmem);
24399 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24401 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24402 prolog_size += GET_MODE_SIZE (mode);
24406 /* Copy last SIZE bytes. */
24407 destmem = offset_address (destmem, *count, 1);
24408 destmem = offset_address (destmem,
24409 GEN_INT (-size - prolog_size),
24411 if (issetmem)
24412 emit_move_insn (destmem, mode_value);
24413 else
24415 srcmem = offset_address (srcmem, *count, 1);
24416 srcmem = offset_address (srcmem,
24417 GEN_INT (-size - prolog_size),
24419 emit_move_insn (destmem, srcmem);
24421 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
24423 destmem = offset_address (destmem, modesize, 1);
24424 if (issetmem)
24425 emit_move_insn (destmem, mode_value);
24426 else
24428 srcmem = offset_address (srcmem, modesize, 1);
24429 emit_move_insn (destmem, srcmem);
24433 /* Align destination. */
24434 if (desired_align > 1 && desired_align > align)
24436 rtx saveddest = *destptr;
24438 gcc_assert (desired_align <= size);
24439 /* Align destptr up, place it to new register. */
24440 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
24441 GEN_INT (prolog_size),
24442 NULL_RTX, 1, OPTAB_DIRECT);
24443 if (REG_P (*destptr) && REG_P (saveddest) && REG_POINTER (saveddest))
24444 REG_POINTER (*destptr) = 1;
24445 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
24446 GEN_INT (-desired_align),
24447 *destptr, 1, OPTAB_DIRECT);
24448 /* See how many bytes we skipped. */
24449 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
24450 *destptr,
24451 saveddest, 1, OPTAB_DIRECT);
24452 /* Adjust srcptr and count. */
24453 if (!issetmem)
24454 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr,
24455 saveddest, *srcptr, 1, OPTAB_DIRECT);
24456 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24457 saveddest, *count, 1, OPTAB_DIRECT);
24458 /* We copied at most size + prolog_size. */
24459 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
24460 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
24461 else
24462 *min_size = 0;
24464 /* Our loops always round down the bock size, but for dispatch to library
24465 we need precise value. */
24466 if (dynamic_check)
24467 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
24468 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
24470 else
24472 gcc_assert (prolog_size == 0);
24473 /* Decrease count, so we won't end up copying last word twice. */
24474 if (!CONST_INT_P (*count))
24475 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24476 constm1_rtx, *count, 1, OPTAB_DIRECT);
24477 else
24478 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
24479 if (*min_size)
24480 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
24485 /* This function is like the previous one, except here we know how many bytes
24486 need to be copied. That allows us to update alignment not only of DST, which
24487 is returned, but also of SRC, which is passed as a pointer for that
24488 reason. */
24489 static rtx
24490 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
24491 rtx srcreg, rtx value, rtx vec_value,
24492 int desired_align, int align_bytes,
24493 bool issetmem)
24495 rtx src = NULL;
24496 rtx orig_dst = dst;
24497 rtx orig_src = NULL;
24498 int piece_size = 1;
24499 int copied_bytes = 0;
24501 if (!issetmem)
24503 gcc_assert (srcp != NULL);
24504 src = *srcp;
24505 orig_src = src;
24508 for (piece_size = 1;
24509 piece_size <= desired_align && copied_bytes < align_bytes;
24510 piece_size <<= 1)
24512 if (align_bytes & piece_size)
24514 if (issetmem)
24516 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
24517 dst = emit_memset (dst, destreg, vec_value, piece_size);
24518 else
24519 dst = emit_memset (dst, destreg, value, piece_size);
24521 else
24522 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
24523 copied_bytes += piece_size;
24526 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
24527 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24528 if (MEM_SIZE_KNOWN_P (orig_dst))
24529 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
24531 if (!issetmem)
24533 int src_align_bytes = get_mem_align_offset (src, desired_align
24534 * BITS_PER_UNIT);
24535 if (src_align_bytes >= 0)
24536 src_align_bytes = desired_align - src_align_bytes;
24537 if (src_align_bytes >= 0)
24539 unsigned int src_align;
24540 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
24542 if ((src_align_bytes & (src_align - 1))
24543 == (align_bytes & (src_align - 1)))
24544 break;
24546 if (src_align > (unsigned int) desired_align)
24547 src_align = desired_align;
24548 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
24549 set_mem_align (src, src_align * BITS_PER_UNIT);
24551 if (MEM_SIZE_KNOWN_P (orig_src))
24552 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
24553 *srcp = src;
24556 return dst;
24559 /* Return true if ALG can be used in current context.
24560 Assume we expand memset if MEMSET is true. */
24561 static bool
24562 alg_usable_p (enum stringop_alg alg, bool memset)
24564 if (alg == no_stringop)
24565 return false;
24566 if (alg == vector_loop)
24567 return TARGET_SSE || TARGET_AVX;
24568 /* Algorithms using the rep prefix want at least edi and ecx;
24569 additionally, memset wants eax and memcpy wants esi. Don't
24570 consider such algorithms if the user has appropriated those
24571 registers for their own purposes. */
24572 if (alg == rep_prefix_1_byte
24573 || alg == rep_prefix_4_byte
24574 || alg == rep_prefix_8_byte)
24575 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
24576 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
24577 return true;
24580 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
24581 static enum stringop_alg
24582 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
24583 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
24584 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
24586 const struct stringop_algs * algs;
24587 bool optimize_for_speed;
24588 int max = 0;
24589 const struct processor_costs *cost;
24590 int i;
24591 bool any_alg_usable_p = false;
24593 *noalign = false;
24594 *dynamic_check = -1;
24596 /* Even if the string operation call is cold, we still might spend a lot
24597 of time processing large blocks. */
24598 if (optimize_function_for_size_p (cfun)
24599 || (optimize_insn_for_size_p ()
24600 && (max_size < 256
24601 || (expected_size != -1 && expected_size < 256))))
24602 optimize_for_speed = false;
24603 else
24604 optimize_for_speed = true;
24606 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
24607 if (memset)
24608 algs = &cost->memset[TARGET_64BIT != 0];
24609 else
24610 algs = &cost->memcpy[TARGET_64BIT != 0];
24612 /* See maximal size for user defined algorithm. */
24613 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24615 enum stringop_alg candidate = algs->size[i].alg;
24616 bool usable = alg_usable_p (candidate, memset);
24617 any_alg_usable_p |= usable;
24619 if (candidate != libcall && candidate && usable)
24620 max = algs->size[i].max;
24623 /* If expected size is not known but max size is small enough
24624 so inline version is a win, set expected size into
24625 the range. */
24626 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
24627 && expected_size == -1)
24628 expected_size = min_size / 2 + max_size / 2;
24630 /* If user specified the algorithm, honnor it if possible. */
24631 if (ix86_stringop_alg != no_stringop
24632 && alg_usable_p (ix86_stringop_alg, memset))
24633 return ix86_stringop_alg;
24634 /* rep; movq or rep; movl is the smallest variant. */
24635 else if (!optimize_for_speed)
24637 *noalign = true;
24638 if (!count || (count & 3) || (memset && !zero_memset))
24639 return alg_usable_p (rep_prefix_1_byte, memset)
24640 ? rep_prefix_1_byte : loop_1_byte;
24641 else
24642 return alg_usable_p (rep_prefix_4_byte, memset)
24643 ? rep_prefix_4_byte : loop;
24645 /* Very tiny blocks are best handled via the loop, REP is expensive to
24646 setup. */
24647 else if (expected_size != -1 && expected_size < 4)
24648 return loop_1_byte;
24649 else if (expected_size != -1)
24651 enum stringop_alg alg = libcall;
24652 bool alg_noalign = false;
24653 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24655 /* We get here if the algorithms that were not libcall-based
24656 were rep-prefix based and we are unable to use rep prefixes
24657 based on global register usage. Break out of the loop and
24658 use the heuristic below. */
24659 if (algs->size[i].max == 0)
24660 break;
24661 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
24663 enum stringop_alg candidate = algs->size[i].alg;
24665 if (candidate != libcall && alg_usable_p (candidate, memset))
24667 alg = candidate;
24668 alg_noalign = algs->size[i].noalign;
24670 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
24671 last non-libcall inline algorithm. */
24672 if (TARGET_INLINE_ALL_STRINGOPS)
24674 /* When the current size is best to be copied by a libcall,
24675 but we are still forced to inline, run the heuristic below
24676 that will pick code for medium sized blocks. */
24677 if (alg != libcall)
24679 *noalign = alg_noalign;
24680 return alg;
24682 else if (!any_alg_usable_p)
24683 break;
24685 else if (alg_usable_p (candidate, memset))
24687 *noalign = algs->size[i].noalign;
24688 return candidate;
24693 /* When asked to inline the call anyway, try to pick meaningful choice.
24694 We look for maximal size of block that is faster to copy by hand and
24695 take blocks of at most of that size guessing that average size will
24696 be roughly half of the block.
24698 If this turns out to be bad, we might simply specify the preferred
24699 choice in ix86_costs. */
24700 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24701 && (algs->unknown_size == libcall
24702 || !alg_usable_p (algs->unknown_size, memset)))
24704 enum stringop_alg alg;
24706 /* If there aren't any usable algorithms, then recursing on
24707 smaller sizes isn't going to find anything. Just return the
24708 simple byte-at-a-time copy loop. */
24709 if (!any_alg_usable_p)
24711 /* Pick something reasonable. */
24712 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24713 *dynamic_check = 128;
24714 return loop_1_byte;
24716 if (max <= 0)
24717 max = 4096;
24718 alg = decide_alg (count, max / 2, min_size, max_size, memset,
24719 zero_memset, dynamic_check, noalign);
24720 gcc_assert (*dynamic_check == -1);
24721 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24722 *dynamic_check = max;
24723 else
24724 gcc_assert (alg != libcall);
24725 return alg;
24727 return (alg_usable_p (algs->unknown_size, memset)
24728 ? algs->unknown_size : libcall);
24731 /* Decide on alignment. We know that the operand is already aligned to ALIGN
24732 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
24733 static int
24734 decide_alignment (int align,
24735 enum stringop_alg alg,
24736 int expected_size,
24737 machine_mode move_mode)
24739 int desired_align = 0;
24741 gcc_assert (alg != no_stringop);
24743 if (alg == libcall)
24744 return 0;
24745 if (move_mode == VOIDmode)
24746 return 0;
24748 desired_align = GET_MODE_SIZE (move_mode);
24749 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
24750 copying whole cacheline at once. */
24751 if (TARGET_PENTIUMPRO
24752 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
24753 desired_align = 8;
24755 if (optimize_size)
24756 desired_align = 1;
24757 if (desired_align < align)
24758 desired_align = align;
24759 if (expected_size != -1 && expected_size < 4)
24760 desired_align = align;
24762 return desired_align;
24766 /* Helper function for memcpy. For QImode value 0xXY produce
24767 0xXYXYXYXY of wide specified by MODE. This is essentially
24768 a * 0x10101010, but we can do slightly better than
24769 synth_mult by unwinding the sequence by hand on CPUs with
24770 slow multiply. */
24771 static rtx
24772 promote_duplicated_reg (machine_mode mode, rtx val)
24774 machine_mode valmode = GET_MODE (val);
24775 rtx tmp;
24776 int nops = mode == DImode ? 3 : 2;
24778 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24779 if (val == const0_rtx)
24780 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24781 if (CONST_INT_P (val))
24783 HOST_WIDE_INT v = INTVAL (val) & 255;
24785 v |= v << 8;
24786 v |= v << 16;
24787 if (mode == DImode)
24788 v |= (v << 16) << 16;
24789 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24792 if (valmode == VOIDmode)
24793 valmode = QImode;
24794 if (valmode != QImode)
24795 val = gen_lowpart (QImode, val);
24796 if (mode == QImode)
24797 return val;
24798 if (!TARGET_PARTIAL_REG_STALL)
24799 nops--;
24800 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24801 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24802 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24803 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24805 rtx reg = convert_modes (mode, QImode, val, true);
24806 tmp = promote_duplicated_reg (mode, const1_rtx);
24807 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24808 OPTAB_DIRECT);
24810 else
24812 rtx reg = convert_modes (mode, QImode, val, true);
24814 if (!TARGET_PARTIAL_REG_STALL)
24815 if (mode == SImode)
24816 emit_insn (gen_insvsi_1 (reg, reg));
24817 else
24818 emit_insn (gen_insvdi_1 (reg, reg));
24819 else
24821 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24822 NULL, 1, OPTAB_DIRECT);
24823 reg =
24824 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24826 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24827 NULL, 1, OPTAB_DIRECT);
24828 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24829 if (mode == SImode)
24830 return reg;
24831 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24832 NULL, 1, OPTAB_DIRECT);
24833 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24834 return reg;
24838 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24839 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24840 alignment from ALIGN to DESIRED_ALIGN. */
24841 static rtx
24842 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24843 int align)
24845 rtx promoted_val;
24847 if (TARGET_64BIT
24848 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24849 promoted_val = promote_duplicated_reg (DImode, val);
24850 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24851 promoted_val = promote_duplicated_reg (SImode, val);
24852 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24853 promoted_val = promote_duplicated_reg (HImode, val);
24854 else
24855 promoted_val = val;
24857 return promoted_val;
24860 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24861 operations when profitable. The code depends upon architecture, block size
24862 and alignment, but always has one of the following overall structures:
24864 Aligned move sequence:
24866 1) Prologue guard: Conditional that jumps up to epilogues for small
24867 blocks that can be handled by epilogue alone. This is faster
24868 but also needed for correctness, since prologue assume the block
24869 is larger than the desired alignment.
24871 Optional dynamic check for size and libcall for large
24872 blocks is emitted here too, with -minline-stringops-dynamically.
24874 2) Prologue: copy first few bytes in order to get destination
24875 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24876 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24877 copied. We emit either a jump tree on power of two sized
24878 blocks, or a byte loop.
24880 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24881 with specified algorithm.
24883 4) Epilogue: code copying tail of the block that is too small to be
24884 handled by main body (or up to size guarded by prologue guard).
24886 Misaligned move sequence
24888 1) missaligned move prologue/epilogue containing:
24889 a) Prologue handling small memory blocks and jumping to done_label
24890 (skipped if blocks are known to be large enough)
24891 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24892 needed by single possibly misaligned move
24893 (skipped if alignment is not needed)
24894 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24896 2) Zero size guard dispatching to done_label, if needed
24898 3) dispatch to library call, if needed,
24900 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24901 with specified algorithm. */
24902 bool
24903 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24904 rtx align_exp, rtx expected_align_exp,
24905 rtx expected_size_exp, rtx min_size_exp,
24906 rtx max_size_exp, rtx probable_max_size_exp,
24907 bool issetmem)
24909 rtx destreg;
24910 rtx srcreg = NULL;
24911 rtx_code_label *label = NULL;
24912 rtx tmp;
24913 rtx_code_label *jump_around_label = NULL;
24914 HOST_WIDE_INT align = 1;
24915 unsigned HOST_WIDE_INT count = 0;
24916 HOST_WIDE_INT expected_size = -1;
24917 int size_needed = 0, epilogue_size_needed;
24918 int desired_align = 0, align_bytes = 0;
24919 enum stringop_alg alg;
24920 rtx promoted_val = NULL;
24921 rtx vec_promoted_val = NULL;
24922 bool force_loopy_epilogue = false;
24923 int dynamic_check;
24924 bool need_zero_guard = false;
24925 bool noalign;
24926 machine_mode move_mode = VOIDmode;
24927 int unroll_factor = 1;
24928 /* TODO: Once value ranges are available, fill in proper data. */
24929 unsigned HOST_WIDE_INT min_size = 0;
24930 unsigned HOST_WIDE_INT max_size = -1;
24931 unsigned HOST_WIDE_INT probable_max_size = -1;
24932 bool misaligned_prologue_used = false;
24934 if (CONST_INT_P (align_exp))
24935 align = INTVAL (align_exp);
24936 /* i386 can do misaligned access on reasonably increased cost. */
24937 if (CONST_INT_P (expected_align_exp)
24938 && INTVAL (expected_align_exp) > align)
24939 align = INTVAL (expected_align_exp);
24940 /* ALIGN is the minimum of destination and source alignment, but we care here
24941 just about destination alignment. */
24942 else if (!issetmem
24943 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24944 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24946 if (CONST_INT_P (count_exp))
24948 min_size = max_size = probable_max_size = count = expected_size
24949 = INTVAL (count_exp);
24950 /* When COUNT is 0, there is nothing to do. */
24951 if (!count)
24952 return true;
24954 else
24956 if (min_size_exp)
24957 min_size = INTVAL (min_size_exp);
24958 if (max_size_exp)
24959 max_size = INTVAL (max_size_exp);
24960 if (probable_max_size_exp)
24961 probable_max_size = INTVAL (probable_max_size_exp);
24962 if (CONST_INT_P (expected_size_exp))
24963 expected_size = INTVAL (expected_size_exp);
24966 /* Make sure we don't need to care about overflow later on. */
24967 if (count > (HOST_WIDE_INT_1U << 30))
24968 return false;
24970 /* Step 0: Decide on preferred algorithm, desired alignment and
24971 size of chunks to be copied by main loop. */
24972 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24973 issetmem,
24974 issetmem && val_exp == const0_rtx,
24975 &dynamic_check, &noalign);
24976 if (alg == libcall)
24977 return false;
24978 gcc_assert (alg != no_stringop);
24980 /* For now vector-version of memset is generated only for memory zeroing, as
24981 creating of promoted vector value is very cheap in this case. */
24982 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24983 alg = unrolled_loop;
24985 if (!count)
24986 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24987 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24988 if (!issetmem)
24989 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24991 unroll_factor = 1;
24992 move_mode = word_mode;
24993 switch (alg)
24995 case libcall:
24996 case no_stringop:
24997 case last_alg:
24998 gcc_unreachable ();
24999 case loop_1_byte:
25000 need_zero_guard = true;
25001 move_mode = QImode;
25002 break;
25003 case loop:
25004 need_zero_guard = true;
25005 break;
25006 case unrolled_loop:
25007 need_zero_guard = true;
25008 unroll_factor = (TARGET_64BIT ? 4 : 2);
25009 break;
25010 case vector_loop:
25011 need_zero_guard = true;
25012 unroll_factor = 4;
25013 /* Find the widest supported mode. */
25014 move_mode = word_mode;
25015 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
25016 != CODE_FOR_nothing)
25017 move_mode = GET_MODE_WIDER_MODE (move_mode);
25019 /* Find the corresponding vector mode with the same size as MOVE_MODE.
25020 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
25021 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
25023 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
25024 move_mode = mode_for_vector (word_mode, nunits);
25025 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
25026 move_mode = word_mode;
25028 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
25029 break;
25030 case rep_prefix_8_byte:
25031 move_mode = DImode;
25032 break;
25033 case rep_prefix_4_byte:
25034 move_mode = SImode;
25035 break;
25036 case rep_prefix_1_byte:
25037 move_mode = QImode;
25038 break;
25040 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
25041 epilogue_size_needed = size_needed;
25043 desired_align = decide_alignment (align, alg, expected_size, move_mode);
25044 if (!TARGET_ALIGN_STRINGOPS || noalign)
25045 align = desired_align;
25047 /* Step 1: Prologue guard. */
25049 /* Alignment code needs count to be in register. */
25050 if (CONST_INT_P (count_exp) && desired_align > align)
25052 if (INTVAL (count_exp) > desired_align
25053 && INTVAL (count_exp) > size_needed)
25055 align_bytes
25056 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
25057 if (align_bytes <= 0)
25058 align_bytes = 0;
25059 else
25060 align_bytes = desired_align - align_bytes;
25062 if (align_bytes == 0)
25063 count_exp = force_reg (counter_mode (count_exp), count_exp);
25065 gcc_assert (desired_align >= 1 && align >= 1);
25067 /* Misaligned move sequences handle both prologue and epilogue at once.
25068 Default code generation results in a smaller code for large alignments
25069 and also avoids redundant job when sizes are known precisely. */
25070 misaligned_prologue_used
25071 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
25072 && MAX (desired_align, epilogue_size_needed) <= 32
25073 && desired_align <= epilogue_size_needed
25074 && ((desired_align > align && !align_bytes)
25075 || (!count && epilogue_size_needed > 1)));
25077 /* Do the cheap promotion to allow better CSE across the
25078 main loop and epilogue (ie one load of the big constant in the
25079 front of all code.
25080 For now the misaligned move sequences do not have fast path
25081 without broadcasting. */
25082 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
25084 if (alg == vector_loop)
25086 gcc_assert (val_exp == const0_rtx);
25087 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
25088 promoted_val = promote_duplicated_reg_to_size (val_exp,
25089 GET_MODE_SIZE (word_mode),
25090 desired_align, align);
25092 else
25094 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
25095 desired_align, align);
25098 /* Misaligned move sequences handles both prologues and epilogues at once.
25099 Default code generation results in smaller code for large alignments and
25100 also avoids redundant job when sizes are known precisely. */
25101 if (misaligned_prologue_used)
25103 /* Misaligned move prologue handled small blocks by itself. */
25104 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
25105 (dst, src, &destreg, &srcreg,
25106 move_mode, promoted_val, vec_promoted_val,
25107 &count_exp,
25108 &jump_around_label,
25109 desired_align < align
25110 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
25111 desired_align, align, &min_size, dynamic_check, issetmem);
25112 if (!issetmem)
25113 src = change_address (src, BLKmode, srcreg);
25114 dst = change_address (dst, BLKmode, destreg);
25115 set_mem_align (dst, desired_align * BITS_PER_UNIT);
25116 epilogue_size_needed = 0;
25117 if (need_zero_guard
25118 && min_size < (unsigned HOST_WIDE_INT) size_needed)
25120 /* It is possible that we copied enough so the main loop will not
25121 execute. */
25122 gcc_assert (size_needed > 1);
25123 if (jump_around_label == NULL_RTX)
25124 jump_around_label = gen_label_rtx ();
25125 emit_cmp_and_jump_insns (count_exp,
25126 GEN_INT (size_needed),
25127 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
25128 if (expected_size == -1
25129 || expected_size < (desired_align - align) / 2 + size_needed)
25130 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25131 else
25132 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25135 /* Ensure that alignment prologue won't copy past end of block. */
25136 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
25138 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
25139 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
25140 Make sure it is power of 2. */
25141 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
25143 /* To improve performance of small blocks, we jump around the VAL
25144 promoting mode. This mean that if the promoted VAL is not constant,
25145 we might not use it in the epilogue and have to use byte
25146 loop variant. */
25147 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
25148 force_loopy_epilogue = true;
25149 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25150 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25152 /* If main algorithm works on QImode, no epilogue is needed.
25153 For small sizes just don't align anything. */
25154 if (size_needed == 1)
25155 desired_align = align;
25156 else
25157 goto epilogue;
25159 else if (!count
25160 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25162 label = gen_label_rtx ();
25163 emit_cmp_and_jump_insns (count_exp,
25164 GEN_INT (epilogue_size_needed),
25165 LTU, 0, counter_mode (count_exp), 1, label);
25166 if (expected_size == -1 || expected_size < epilogue_size_needed)
25167 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25168 else
25169 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25173 /* Emit code to decide on runtime whether library call or inline should be
25174 used. */
25175 if (dynamic_check != -1)
25177 if (!issetmem && CONST_INT_P (count_exp))
25179 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
25181 emit_block_move_via_libcall (dst, src, count_exp, false);
25182 count_exp = const0_rtx;
25183 goto epilogue;
25186 else
25188 rtx_code_label *hot_label = gen_label_rtx ();
25189 if (jump_around_label == NULL_RTX)
25190 jump_around_label = gen_label_rtx ();
25191 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
25192 LEU, 0, counter_mode (count_exp),
25193 1, hot_label);
25194 predict_jump (REG_BR_PROB_BASE * 90 / 100);
25195 if (issetmem)
25196 set_storage_via_libcall (dst, count_exp, val_exp, false);
25197 else
25198 emit_block_move_via_libcall (dst, src, count_exp, false);
25199 emit_jump (jump_around_label);
25200 emit_label (hot_label);
25204 /* Step 2: Alignment prologue. */
25205 /* Do the expensive promotion once we branched off the small blocks. */
25206 if (issetmem && !promoted_val)
25207 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
25208 desired_align, align);
25210 if (desired_align > align && !misaligned_prologue_used)
25212 if (align_bytes == 0)
25214 /* Except for the first move in prologue, we no longer know
25215 constant offset in aliasing info. It don't seems to worth
25216 the pain to maintain it for the first move, so throw away
25217 the info early. */
25218 dst = change_address (dst, BLKmode, destreg);
25219 if (!issetmem)
25220 src = change_address (src, BLKmode, srcreg);
25221 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
25222 promoted_val, vec_promoted_val,
25223 count_exp, align, desired_align,
25224 issetmem);
25225 /* At most desired_align - align bytes are copied. */
25226 if (min_size < (unsigned)(desired_align - align))
25227 min_size = 0;
25228 else
25229 min_size -= desired_align - align;
25231 else
25233 /* If we know how many bytes need to be stored before dst is
25234 sufficiently aligned, maintain aliasing info accurately. */
25235 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
25236 srcreg,
25237 promoted_val,
25238 vec_promoted_val,
25239 desired_align,
25240 align_bytes,
25241 issetmem);
25243 count_exp = plus_constant (counter_mode (count_exp),
25244 count_exp, -align_bytes);
25245 count -= align_bytes;
25246 min_size -= align_bytes;
25247 max_size -= align_bytes;
25249 if (need_zero_guard
25250 && min_size < (unsigned HOST_WIDE_INT) size_needed
25251 && (count < (unsigned HOST_WIDE_INT) size_needed
25252 || (align_bytes == 0
25253 && count < ((unsigned HOST_WIDE_INT) size_needed
25254 + desired_align - align))))
25256 /* It is possible that we copied enough so the main loop will not
25257 execute. */
25258 gcc_assert (size_needed > 1);
25259 if (label == NULL_RTX)
25260 label = gen_label_rtx ();
25261 emit_cmp_and_jump_insns (count_exp,
25262 GEN_INT (size_needed),
25263 LTU, 0, counter_mode (count_exp), 1, label);
25264 if (expected_size == -1
25265 || expected_size < (desired_align - align) / 2 + size_needed)
25266 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25267 else
25268 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25271 if (label && size_needed == 1)
25273 emit_label (label);
25274 LABEL_NUSES (label) = 1;
25275 label = NULL;
25276 epilogue_size_needed = 1;
25277 if (issetmem)
25278 promoted_val = val_exp;
25280 else if (label == NULL_RTX && !misaligned_prologue_used)
25281 epilogue_size_needed = size_needed;
25283 /* Step 3: Main loop. */
25285 switch (alg)
25287 case libcall:
25288 case no_stringop:
25289 case last_alg:
25290 gcc_unreachable ();
25291 case loop_1_byte:
25292 case loop:
25293 case unrolled_loop:
25294 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
25295 count_exp, move_mode, unroll_factor,
25296 expected_size, issetmem);
25297 break;
25298 case vector_loop:
25299 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
25300 vec_promoted_val, count_exp, move_mode,
25301 unroll_factor, expected_size, issetmem);
25302 break;
25303 case rep_prefix_8_byte:
25304 case rep_prefix_4_byte:
25305 case rep_prefix_1_byte:
25306 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
25307 val_exp, count_exp, move_mode, issetmem);
25308 break;
25310 /* Adjust properly the offset of src and dest memory for aliasing. */
25311 if (CONST_INT_P (count_exp))
25313 if (!issetmem)
25314 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
25315 (count / size_needed) * size_needed);
25316 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
25317 (count / size_needed) * size_needed);
25319 else
25321 if (!issetmem)
25322 src = change_address (src, BLKmode, srcreg);
25323 dst = change_address (dst, BLKmode, destreg);
25326 /* Step 4: Epilogue to copy the remaining bytes. */
25327 epilogue:
25328 if (label)
25330 /* When the main loop is done, COUNT_EXP might hold original count,
25331 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
25332 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
25333 bytes. Compensate if needed. */
25335 if (size_needed < epilogue_size_needed)
25337 tmp =
25338 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
25339 GEN_INT (size_needed - 1), count_exp, 1,
25340 OPTAB_DIRECT);
25341 if (tmp != count_exp)
25342 emit_move_insn (count_exp, tmp);
25344 emit_label (label);
25345 LABEL_NUSES (label) = 1;
25348 if (count_exp != const0_rtx && epilogue_size_needed > 1)
25350 if (force_loopy_epilogue)
25351 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
25352 epilogue_size_needed);
25353 else
25355 if (issetmem)
25356 expand_setmem_epilogue (dst, destreg, promoted_val,
25357 vec_promoted_val, count_exp,
25358 epilogue_size_needed);
25359 else
25360 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
25361 epilogue_size_needed);
25364 if (jump_around_label)
25365 emit_label (jump_around_label);
25366 return true;
25370 /* Expand the appropriate insns for doing strlen if not just doing
25371 repnz; scasb
25373 out = result, initialized with the start address
25374 align_rtx = alignment of the address.
25375 scratch = scratch register, initialized with the startaddress when
25376 not aligned, otherwise undefined
25378 This is just the body. It needs the initializations mentioned above and
25379 some address computing at the end. These things are done in i386.md. */
25381 static void
25382 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
25384 int align;
25385 rtx tmp;
25386 rtx_code_label *align_2_label = NULL;
25387 rtx_code_label *align_3_label = NULL;
25388 rtx_code_label *align_4_label = gen_label_rtx ();
25389 rtx_code_label *end_0_label = gen_label_rtx ();
25390 rtx mem;
25391 rtx tmpreg = gen_reg_rtx (SImode);
25392 rtx scratch = gen_reg_rtx (SImode);
25393 rtx cmp;
25395 align = 0;
25396 if (CONST_INT_P (align_rtx))
25397 align = INTVAL (align_rtx);
25399 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
25401 /* Is there a known alignment and is it less than 4? */
25402 if (align < 4)
25404 rtx scratch1 = gen_reg_rtx (Pmode);
25405 emit_move_insn (scratch1, out);
25406 /* Is there a known alignment and is it not 2? */
25407 if (align != 2)
25409 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
25410 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
25412 /* Leave just the 3 lower bits. */
25413 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
25414 NULL_RTX, 0, OPTAB_WIDEN);
25416 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25417 Pmode, 1, align_4_label);
25418 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
25419 Pmode, 1, align_2_label);
25420 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
25421 Pmode, 1, align_3_label);
25423 else
25425 /* Since the alignment is 2, we have to check 2 or 0 bytes;
25426 check if is aligned to 4 - byte. */
25428 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
25429 NULL_RTX, 0, OPTAB_WIDEN);
25431 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25432 Pmode, 1, align_4_label);
25435 mem = change_address (src, QImode, out);
25437 /* Now compare the bytes. */
25439 /* Compare the first n unaligned byte on a byte per byte basis. */
25440 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
25441 QImode, 1, end_0_label);
25443 /* Increment the address. */
25444 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25446 /* Not needed with an alignment of 2 */
25447 if (align != 2)
25449 emit_label (align_2_label);
25451 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25452 end_0_label);
25454 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25456 emit_label (align_3_label);
25459 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25460 end_0_label);
25462 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25465 /* Generate loop to check 4 bytes at a time. It is not a good idea to
25466 align this loop. It gives only huge programs, but does not help to
25467 speed up. */
25468 emit_label (align_4_label);
25470 mem = change_address (src, SImode, out);
25471 emit_move_insn (scratch, mem);
25472 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
25474 /* This formula yields a nonzero result iff one of the bytes is zero.
25475 This saves three branches inside loop and many cycles. */
25477 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
25478 emit_insn (gen_one_cmplsi2 (scratch, scratch));
25479 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
25480 emit_insn (gen_andsi3 (tmpreg, tmpreg,
25481 gen_int_mode (0x80808080, SImode)));
25482 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
25483 align_4_label);
25485 if (TARGET_CMOVE)
25487 rtx reg = gen_reg_rtx (SImode);
25488 rtx reg2 = gen_reg_rtx (Pmode);
25489 emit_move_insn (reg, tmpreg);
25490 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
25492 /* If zero is not in the first two bytes, move two bytes forward. */
25493 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25494 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25495 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25496 emit_insn (gen_rtx_SET (tmpreg,
25497 gen_rtx_IF_THEN_ELSE (SImode, tmp,
25498 reg,
25499 tmpreg)));
25500 /* Emit lea manually to avoid clobbering of flags. */
25501 emit_insn (gen_rtx_SET (reg2, gen_rtx_PLUS (Pmode, out, const2_rtx)));
25503 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25504 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25505 emit_insn (gen_rtx_SET (out,
25506 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
25507 reg2,
25508 out)));
25510 else
25512 rtx_code_label *end_2_label = gen_label_rtx ();
25513 /* Is zero in the first two bytes? */
25515 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25516 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25517 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
25518 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
25519 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
25520 pc_rtx);
25521 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
25522 JUMP_LABEL (tmp) = end_2_label;
25524 /* Not in the first two. Move two bytes forward. */
25525 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
25526 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
25528 emit_label (end_2_label);
25532 /* Avoid branch in fixing the byte. */
25533 tmpreg = gen_lowpart (QImode, tmpreg);
25534 emit_insn (gen_addqi3_cconly_overflow (tmpreg, tmpreg));
25535 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
25536 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
25537 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
25539 emit_label (end_0_label);
25542 /* Expand strlen. */
25544 bool
25545 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
25547 rtx addr, scratch1, scratch2, scratch3, scratch4;
25549 /* The generic case of strlen expander is long. Avoid it's
25550 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
25552 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25553 && !TARGET_INLINE_ALL_STRINGOPS
25554 && !optimize_insn_for_size_p ()
25555 && (!CONST_INT_P (align) || INTVAL (align) < 4))
25556 return false;
25558 addr = force_reg (Pmode, XEXP (src, 0));
25559 scratch1 = gen_reg_rtx (Pmode);
25561 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25562 && !optimize_insn_for_size_p ())
25564 /* Well it seems that some optimizer does not combine a call like
25565 foo(strlen(bar), strlen(bar));
25566 when the move and the subtraction is done here. It does calculate
25567 the length just once when these instructions are done inside of
25568 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
25569 often used and I use one fewer register for the lifetime of
25570 output_strlen_unroll() this is better. */
25572 emit_move_insn (out, addr);
25574 ix86_expand_strlensi_unroll_1 (out, src, align);
25576 /* strlensi_unroll_1 returns the address of the zero at the end of
25577 the string, like memchr(), so compute the length by subtracting
25578 the start address. */
25579 emit_insn (ix86_gen_sub3 (out, out, addr));
25581 else
25583 rtx unspec;
25585 /* Can't use this if the user has appropriated eax, ecx, or edi. */
25586 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
25587 return false;
25589 scratch2 = gen_reg_rtx (Pmode);
25590 scratch3 = gen_reg_rtx (Pmode);
25591 scratch4 = force_reg (Pmode, constm1_rtx);
25593 emit_move_insn (scratch3, addr);
25594 eoschar = force_reg (QImode, eoschar);
25596 src = replace_equiv_address_nv (src, scratch3);
25598 /* If .md starts supporting :P, this can be done in .md. */
25599 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
25600 scratch4), UNSPEC_SCAS);
25601 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
25602 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
25603 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
25605 return true;
25608 /* For given symbol (function) construct code to compute address of it's PLT
25609 entry in large x86-64 PIC model. */
25610 static rtx
25611 construct_plt_address (rtx symbol)
25613 rtx tmp, unspec;
25615 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
25616 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
25617 gcc_assert (Pmode == DImode);
25619 tmp = gen_reg_rtx (Pmode);
25620 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
25622 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
25623 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
25624 return tmp;
25628 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
25629 rtx callarg2,
25630 rtx pop, bool sibcall)
25632 rtx vec[3];
25633 rtx use = NULL, call;
25634 unsigned int vec_len = 0;
25636 if (pop == const0_rtx)
25637 pop = NULL;
25638 gcc_assert (!TARGET_64BIT || !pop);
25640 if (TARGET_MACHO && !TARGET_64BIT)
25642 #if TARGET_MACHO
25643 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
25644 fnaddr = machopic_indirect_call_target (fnaddr);
25645 #endif
25647 else
25649 /* Static functions and indirect calls don't need the pic register. Also,
25650 check if PLT was explicitly avoided via no-plt or "noplt" attribute, making
25651 it an indirect call. */
25652 if (flag_pic
25653 && (!TARGET_64BIT
25654 || (ix86_cmodel == CM_LARGE_PIC
25655 && DEFAULT_ABI != MS_ABI))
25656 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25657 && !SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0))
25658 && flag_plt
25659 && (SYMBOL_REF_DECL ((XEXP (fnaddr, 0))) == NULL_TREE
25660 || !lookup_attribute ("noplt",
25661 DECL_ATTRIBUTES (SYMBOL_REF_DECL (XEXP (fnaddr, 0))))))
25663 use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
25664 if (ix86_use_pseudo_pic_reg ())
25665 emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM),
25666 pic_offset_table_rtx);
25670 /* Skip setting up RAX register for -mskip-rax-setup when there are no
25671 parameters passed in vector registers. */
25672 if (TARGET_64BIT
25673 && (INTVAL (callarg2) > 0
25674 || (INTVAL (callarg2) == 0
25675 && (TARGET_SSE || !flag_skip_rax_setup))))
25677 rtx al = gen_rtx_REG (QImode, AX_REG);
25678 emit_move_insn (al, callarg2);
25679 use_reg (&use, al);
25682 if (ix86_cmodel == CM_LARGE_PIC
25683 && !TARGET_PECOFF
25684 && MEM_P (fnaddr)
25685 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25686 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
25687 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
25688 else if (sibcall
25689 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
25690 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
25692 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
25693 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
25696 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
25698 if (retval)
25700 /* We should add bounds as destination register in case
25701 pointer with bounds may be returned. */
25702 if (TARGET_MPX && SCALAR_INT_MODE_P (GET_MODE (retval)))
25704 rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG);
25705 rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1);
25706 if (GET_CODE (retval) == PARALLEL)
25708 b0 = gen_rtx_EXPR_LIST (VOIDmode, b0, const0_rtx);
25709 b1 = gen_rtx_EXPR_LIST (VOIDmode, b1, const0_rtx);
25710 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, b0, b1));
25711 retval = chkp_join_splitted_slot (retval, par);
25713 else
25715 retval = gen_rtx_PARALLEL (VOIDmode,
25716 gen_rtvec (3, retval, b0, b1));
25717 chkp_put_regs_to_expr_list (retval);
25721 call = gen_rtx_SET (retval, call);
25723 vec[vec_len++] = call;
25725 if (pop)
25727 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
25728 pop = gen_rtx_SET (stack_pointer_rtx, pop);
25729 vec[vec_len++] = pop;
25732 if (TARGET_64BIT_MS_ABI
25733 && (!callarg2 || INTVAL (callarg2) != -2))
25735 int const cregs_size
25736 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
25737 int i;
25739 for (i = 0; i < cregs_size; i++)
25741 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
25742 machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
25744 clobber_reg (&use, gen_rtx_REG (mode, regno));
25748 if (vec_len > 1)
25749 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
25750 call = emit_call_insn (call);
25751 if (use)
25752 CALL_INSN_FUNCTION_USAGE (call) = use;
25754 return call;
25757 /* Return true if the function being called was marked with attribute "noplt"
25758 or using -fno-plt and we are compiling for non-PIC and x86_64. We need to
25759 handle the non-PIC case in the backend because there is no easy interface
25760 for the front-end to force non-PLT calls to use the GOT. This is currently
25761 used only with 64-bit ELF targets to call the function marked "noplt"
25762 indirectly. */
25764 static bool
25765 ix86_nopic_noplt_attribute_p (rtx call_op)
25767 if (flag_pic || ix86_cmodel == CM_LARGE
25768 || !TARGET_64BIT || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
25769 || SYMBOL_REF_LOCAL_P (call_op))
25770 return false;
25772 tree symbol_decl = SYMBOL_REF_DECL (call_op);
25774 if (!flag_plt
25775 || (symbol_decl != NULL_TREE
25776 && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl))))
25777 return true;
25779 return false;
25782 /* Output the assembly for a call instruction. */
25784 const char *
25785 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
25787 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
25788 bool seh_nop_p = false;
25789 const char *xasm;
25791 if (SIBLING_CALL_P (insn))
25793 if (direct_p && ix86_nopic_noplt_attribute_p (call_op))
25794 xasm = "%!jmp\t*%p0@GOTPCREL(%%rip)";
25795 else if (direct_p)
25796 xasm = "%!jmp\t%P0";
25797 /* SEH epilogue detection requires the indirect branch case
25798 to include REX.W. */
25799 else if (TARGET_SEH)
25800 xasm = "%!rex.W jmp %A0";
25801 else
25802 xasm = "%!jmp\t%A0";
25804 output_asm_insn (xasm, &call_op);
25805 return "";
25808 /* SEH unwinding can require an extra nop to be emitted in several
25809 circumstances. Determine if we have one of those. */
25810 if (TARGET_SEH)
25812 rtx_insn *i;
25814 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
25816 /* If we get to another real insn, we don't need the nop. */
25817 if (INSN_P (i))
25818 break;
25820 /* If we get to the epilogue note, prevent a catch region from
25821 being adjacent to the standard epilogue sequence. If non-
25822 call-exceptions, we'll have done this during epilogue emission. */
25823 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
25824 && !flag_non_call_exceptions
25825 && !can_throw_internal (insn))
25827 seh_nop_p = true;
25828 break;
25832 /* If we didn't find a real insn following the call, prevent the
25833 unwinder from looking into the next function. */
25834 if (i == NULL)
25835 seh_nop_p = true;
25838 if (direct_p && ix86_nopic_noplt_attribute_p (call_op))
25839 xasm = "%!call\t*%p0@GOTPCREL(%%rip)";
25840 else if (direct_p)
25841 xasm = "%!call\t%P0";
25842 else
25843 xasm = "%!call\t%A0";
25845 output_asm_insn (xasm, &call_op);
25847 if (seh_nop_p)
25848 return "nop";
25850 return "";
25853 /* Clear stack slot assignments remembered from previous functions.
25854 This is called from INIT_EXPANDERS once before RTL is emitted for each
25855 function. */
25857 static struct machine_function *
25858 ix86_init_machine_status (void)
25860 struct machine_function *f;
25862 f = ggc_cleared_alloc<machine_function> ();
25863 f->use_fast_prologue_epilogue_nregs = -1;
25864 f->call_abi = ix86_abi;
25866 return f;
25869 /* Return a MEM corresponding to a stack slot with mode MODE.
25870 Allocate a new slot if necessary.
25872 The RTL for a function can have several slots available: N is
25873 which slot to use. */
25876 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
25878 struct stack_local_entry *s;
25880 gcc_assert (n < MAX_386_STACK_LOCALS);
25882 for (s = ix86_stack_locals; s; s = s->next)
25883 if (s->mode == mode && s->n == n)
25884 return validize_mem (copy_rtx (s->rtl));
25886 s = ggc_alloc<stack_local_entry> ();
25887 s->n = n;
25888 s->mode = mode;
25889 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25891 s->next = ix86_stack_locals;
25892 ix86_stack_locals = s;
25893 return validize_mem (copy_rtx (s->rtl));
25896 static void
25897 ix86_instantiate_decls (void)
25899 struct stack_local_entry *s;
25901 for (s = ix86_stack_locals; s; s = s->next)
25902 if (s->rtl != NULL_RTX)
25903 instantiate_decl_rtl (s->rtl);
25906 /* Check whether x86 address PARTS is a pc-relative address. */
25908 static bool
25909 rip_relative_addr_p (struct ix86_address *parts)
25911 rtx base, index, disp;
25913 base = parts->base;
25914 index = parts->index;
25915 disp = parts->disp;
25917 if (disp && !base && !index)
25919 if (TARGET_64BIT)
25921 rtx symbol = disp;
25923 if (GET_CODE (disp) == CONST)
25924 symbol = XEXP (disp, 0);
25925 if (GET_CODE (symbol) == PLUS
25926 && CONST_INT_P (XEXP (symbol, 1)))
25927 symbol = XEXP (symbol, 0);
25929 if (GET_CODE (symbol) == LABEL_REF
25930 || (GET_CODE (symbol) == SYMBOL_REF
25931 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25932 || (GET_CODE (symbol) == UNSPEC
25933 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25934 || XINT (symbol, 1) == UNSPEC_PCREL
25935 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25936 return true;
25939 return false;
25942 /* Calculate the length of the memory address in the instruction encoding.
25943 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25944 or other prefixes. We never generate addr32 prefix for LEA insn. */
25947 memory_address_length (rtx addr, bool lea)
25949 struct ix86_address parts;
25950 rtx base, index, disp;
25951 int len;
25952 int ok;
25954 if (GET_CODE (addr) == PRE_DEC
25955 || GET_CODE (addr) == POST_INC
25956 || GET_CODE (addr) == PRE_MODIFY
25957 || GET_CODE (addr) == POST_MODIFY)
25958 return 0;
25960 ok = ix86_decompose_address (addr, &parts);
25961 gcc_assert (ok);
25963 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25965 /* If this is not LEA instruction, add the length of addr32 prefix. */
25966 if (TARGET_64BIT && !lea
25967 && (SImode_address_operand (addr, VOIDmode)
25968 || (parts.base && GET_MODE (parts.base) == SImode)
25969 || (parts.index && GET_MODE (parts.index) == SImode)))
25970 len++;
25972 base = parts.base;
25973 index = parts.index;
25974 disp = parts.disp;
25976 if (base && SUBREG_P (base))
25977 base = SUBREG_REG (base);
25978 if (index && SUBREG_P (index))
25979 index = SUBREG_REG (index);
25981 gcc_assert (base == NULL_RTX || REG_P (base));
25982 gcc_assert (index == NULL_RTX || REG_P (index));
25984 /* Rule of thumb:
25985 - esp as the base always wants an index,
25986 - ebp as the base always wants a displacement,
25987 - r12 as the base always wants an index,
25988 - r13 as the base always wants a displacement. */
25990 /* Register Indirect. */
25991 if (base && !index && !disp)
25993 /* esp (for its index) and ebp (for its displacement) need
25994 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25995 code. */
25996 if (base == arg_pointer_rtx
25997 || base == frame_pointer_rtx
25998 || REGNO (base) == SP_REG
25999 || REGNO (base) == BP_REG
26000 || REGNO (base) == R12_REG
26001 || REGNO (base) == R13_REG)
26002 len++;
26005 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
26006 is not disp32, but disp32(%rip), so for disp32
26007 SIB byte is needed, unless print_operand_address
26008 optimizes it into disp32(%rip) or (%rip) is implied
26009 by UNSPEC. */
26010 else if (disp && !base && !index)
26012 len += 4;
26013 if (rip_relative_addr_p (&parts))
26014 len++;
26016 else
26018 /* Find the length of the displacement constant. */
26019 if (disp)
26021 if (base && satisfies_constraint_K (disp))
26022 len += 1;
26023 else
26024 len += 4;
26026 /* ebp always wants a displacement. Similarly r13. */
26027 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
26028 len++;
26030 /* An index requires the two-byte modrm form.... */
26031 if (index
26032 /* ...like esp (or r12), which always wants an index. */
26033 || base == arg_pointer_rtx
26034 || base == frame_pointer_rtx
26035 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
26036 len++;
26039 return len;
26042 /* Compute default value for "length_immediate" attribute. When SHORTFORM
26043 is set, expect that insn have 8bit immediate alternative. */
26045 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
26047 int len = 0;
26048 int i;
26049 extract_insn_cached (insn);
26050 for (i = recog_data.n_operands - 1; i >= 0; --i)
26051 if (CONSTANT_P (recog_data.operand[i]))
26053 enum attr_mode mode = get_attr_mode (insn);
26055 gcc_assert (!len);
26056 if (shortform && CONST_INT_P (recog_data.operand[i]))
26058 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
26059 switch (mode)
26061 case MODE_QI:
26062 len = 1;
26063 continue;
26064 case MODE_HI:
26065 ival = trunc_int_for_mode (ival, HImode);
26066 break;
26067 case MODE_SI:
26068 ival = trunc_int_for_mode (ival, SImode);
26069 break;
26070 default:
26071 break;
26073 if (IN_RANGE (ival, -128, 127))
26075 len = 1;
26076 continue;
26079 switch (mode)
26081 case MODE_QI:
26082 len = 1;
26083 break;
26084 case MODE_HI:
26085 len = 2;
26086 break;
26087 case MODE_SI:
26088 len = 4;
26089 break;
26090 /* Immediates for DImode instructions are encoded
26091 as 32bit sign extended values. */
26092 case MODE_DI:
26093 len = 4;
26094 break;
26095 default:
26096 fatal_insn ("unknown insn mode", insn);
26099 return len;
26102 /* Compute default value for "length_address" attribute. */
26104 ix86_attr_length_address_default (rtx_insn *insn)
26106 int i;
26108 if (get_attr_type (insn) == TYPE_LEA)
26110 rtx set = PATTERN (insn), addr;
26112 if (GET_CODE (set) == PARALLEL)
26113 set = XVECEXP (set, 0, 0);
26115 gcc_assert (GET_CODE (set) == SET);
26117 addr = SET_SRC (set);
26119 return memory_address_length (addr, true);
26122 extract_insn_cached (insn);
26123 for (i = recog_data.n_operands - 1; i >= 0; --i)
26124 if (MEM_P (recog_data.operand[i]))
26126 constrain_operands_cached (insn, reload_completed);
26127 if (which_alternative != -1)
26129 const char *constraints = recog_data.constraints[i];
26130 int alt = which_alternative;
26132 while (*constraints == '=' || *constraints == '+')
26133 constraints++;
26134 while (alt-- > 0)
26135 while (*constraints++ != ',')
26137 /* Skip ignored operands. */
26138 if (*constraints == 'X')
26139 continue;
26141 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
26143 return 0;
26146 /* Compute default value for "length_vex" attribute. It includes
26147 2 or 3 byte VEX prefix and 1 opcode byte. */
26150 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
26151 bool has_vex_w)
26153 int i;
26155 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
26156 byte VEX prefix. */
26157 if (!has_0f_opcode || has_vex_w)
26158 return 3 + 1;
26160 /* We can always use 2 byte VEX prefix in 32bit. */
26161 if (!TARGET_64BIT)
26162 return 2 + 1;
26164 extract_insn_cached (insn);
26166 for (i = recog_data.n_operands - 1; i >= 0; --i)
26167 if (REG_P (recog_data.operand[i]))
26169 /* REX.W bit uses 3 byte VEX prefix. */
26170 if (GET_MODE (recog_data.operand[i]) == DImode
26171 && GENERAL_REG_P (recog_data.operand[i]))
26172 return 3 + 1;
26174 else
26176 /* REX.X or REX.B bits use 3 byte VEX prefix. */
26177 if (MEM_P (recog_data.operand[i])
26178 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
26179 return 3 + 1;
26182 return 2 + 1;
26185 /* Return the maximum number of instructions a cpu can issue. */
26187 static int
26188 ix86_issue_rate (void)
26190 switch (ix86_tune)
26192 case PROCESSOR_PENTIUM:
26193 case PROCESSOR_IAMCU:
26194 case PROCESSOR_BONNELL:
26195 case PROCESSOR_SILVERMONT:
26196 case PROCESSOR_KNL:
26197 case PROCESSOR_INTEL:
26198 case PROCESSOR_K6:
26199 case PROCESSOR_BTVER2:
26200 case PROCESSOR_PENTIUM4:
26201 case PROCESSOR_NOCONA:
26202 return 2;
26204 case PROCESSOR_PENTIUMPRO:
26205 case PROCESSOR_ATHLON:
26206 case PROCESSOR_K8:
26207 case PROCESSOR_AMDFAM10:
26208 case PROCESSOR_GENERIC:
26209 case PROCESSOR_BTVER1:
26210 return 3;
26212 case PROCESSOR_BDVER1:
26213 case PROCESSOR_BDVER2:
26214 case PROCESSOR_BDVER3:
26215 case PROCESSOR_BDVER4:
26216 case PROCESSOR_CORE2:
26217 case PROCESSOR_NEHALEM:
26218 case PROCESSOR_SANDYBRIDGE:
26219 case PROCESSOR_HASWELL:
26220 return 4;
26222 default:
26223 return 1;
26227 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
26228 by DEP_INSN and nothing set by DEP_INSN. */
26230 static bool
26231 ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
26233 rtx set, set2;
26235 /* Simplify the test for uninteresting insns. */
26236 if (insn_type != TYPE_SETCC
26237 && insn_type != TYPE_ICMOV
26238 && insn_type != TYPE_FCMOV
26239 && insn_type != TYPE_IBR)
26240 return false;
26242 if ((set = single_set (dep_insn)) != 0)
26244 set = SET_DEST (set);
26245 set2 = NULL_RTX;
26247 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
26248 && XVECLEN (PATTERN (dep_insn), 0) == 2
26249 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
26250 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
26252 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
26253 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
26255 else
26256 return false;
26258 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
26259 return false;
26261 /* This test is true if the dependent insn reads the flags but
26262 not any other potentially set register. */
26263 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
26264 return false;
26266 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
26267 return false;
26269 return true;
26272 /* Return true iff USE_INSN has a memory address with operands set by
26273 SET_INSN. */
26275 bool
26276 ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
26278 int i;
26279 extract_insn_cached (use_insn);
26280 for (i = recog_data.n_operands - 1; i >= 0; --i)
26281 if (MEM_P (recog_data.operand[i]))
26283 rtx addr = XEXP (recog_data.operand[i], 0);
26284 return modified_in_p (addr, set_insn) != 0;
26286 return false;
26289 /* Helper function for exact_store_load_dependency.
26290 Return true if addr is found in insn. */
26291 static bool
26292 exact_dependency_1 (rtx addr, rtx insn)
26294 enum rtx_code code;
26295 const char *format_ptr;
26296 int i, j;
26298 code = GET_CODE (insn);
26299 switch (code)
26301 case MEM:
26302 if (rtx_equal_p (addr, insn))
26303 return true;
26304 break;
26305 case REG:
26306 CASE_CONST_ANY:
26307 case SYMBOL_REF:
26308 case CODE_LABEL:
26309 case PC:
26310 case CC0:
26311 case EXPR_LIST:
26312 return false;
26313 default:
26314 break;
26317 format_ptr = GET_RTX_FORMAT (code);
26318 for (i = 0; i < GET_RTX_LENGTH (code); i++)
26320 switch (*format_ptr++)
26322 case 'e':
26323 if (exact_dependency_1 (addr, XEXP (insn, i)))
26324 return true;
26325 break;
26326 case 'E':
26327 for (j = 0; j < XVECLEN (insn, i); j++)
26328 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
26329 return true;
26330 break;
26333 return false;
26336 /* Return true if there exists exact dependency for store & load, i.e.
26337 the same memory address is used in them. */
26338 static bool
26339 exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
26341 rtx set1, set2;
26343 set1 = single_set (store);
26344 if (!set1)
26345 return false;
26346 if (!MEM_P (SET_DEST (set1)))
26347 return false;
26348 set2 = single_set (load);
26349 if (!set2)
26350 return false;
26351 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
26352 return true;
26353 return false;
26356 static int
26357 ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
26359 enum attr_type insn_type, dep_insn_type;
26360 enum attr_memory memory;
26361 rtx set, set2;
26362 int dep_insn_code_number;
26364 /* Anti and output dependencies have zero cost on all CPUs. */
26365 if (REG_NOTE_KIND (link) != 0)
26366 return 0;
26368 dep_insn_code_number = recog_memoized (dep_insn);
26370 /* If we can't recognize the insns, we can't really do anything. */
26371 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
26372 return cost;
26374 insn_type = get_attr_type (insn);
26375 dep_insn_type = get_attr_type (dep_insn);
26377 switch (ix86_tune)
26379 case PROCESSOR_PENTIUM:
26380 case PROCESSOR_IAMCU:
26381 /* Address Generation Interlock adds a cycle of latency. */
26382 if (insn_type == TYPE_LEA)
26384 rtx addr = PATTERN (insn);
26386 if (GET_CODE (addr) == PARALLEL)
26387 addr = XVECEXP (addr, 0, 0);
26389 gcc_assert (GET_CODE (addr) == SET);
26391 addr = SET_SRC (addr);
26392 if (modified_in_p (addr, dep_insn))
26393 cost += 1;
26395 else if (ix86_agi_dependent (dep_insn, insn))
26396 cost += 1;
26398 /* ??? Compares pair with jump/setcc. */
26399 if (ix86_flags_dependent (insn, dep_insn, insn_type))
26400 cost = 0;
26402 /* Floating point stores require value to be ready one cycle earlier. */
26403 if (insn_type == TYPE_FMOV
26404 && get_attr_memory (insn) == MEMORY_STORE
26405 && !ix86_agi_dependent (dep_insn, insn))
26406 cost += 1;
26407 break;
26409 case PROCESSOR_PENTIUMPRO:
26410 /* INT->FP conversion is expensive. */
26411 if (get_attr_fp_int_src (dep_insn))
26412 cost += 5;
26414 /* There is one cycle extra latency between an FP op and a store. */
26415 if (insn_type == TYPE_FMOV
26416 && (set = single_set (dep_insn)) != NULL_RTX
26417 && (set2 = single_set (insn)) != NULL_RTX
26418 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
26419 && MEM_P (SET_DEST (set2)))
26420 cost += 1;
26422 memory = get_attr_memory (insn);
26424 /* Show ability of reorder buffer to hide latency of load by executing
26425 in parallel with previous instruction in case
26426 previous instruction is not needed to compute the address. */
26427 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26428 && !ix86_agi_dependent (dep_insn, insn))
26430 /* Claim moves to take one cycle, as core can issue one load
26431 at time and the next load can start cycle later. */
26432 if (dep_insn_type == TYPE_IMOV
26433 || dep_insn_type == TYPE_FMOV)
26434 cost = 1;
26435 else if (cost > 1)
26436 cost--;
26438 break;
26440 case PROCESSOR_K6:
26441 /* The esp dependency is resolved before
26442 the instruction is really finished. */
26443 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26444 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26445 return 1;
26447 /* INT->FP conversion is expensive. */
26448 if (get_attr_fp_int_src (dep_insn))
26449 cost += 5;
26451 memory = get_attr_memory (insn);
26453 /* Show ability of reorder buffer to hide latency of load by executing
26454 in parallel with previous instruction in case
26455 previous instruction is not needed to compute the address. */
26456 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26457 && !ix86_agi_dependent (dep_insn, insn))
26459 /* Claim moves to take one cycle, as core can issue one load
26460 at time and the next load can start cycle later. */
26461 if (dep_insn_type == TYPE_IMOV
26462 || dep_insn_type == TYPE_FMOV)
26463 cost = 1;
26464 else if (cost > 2)
26465 cost -= 2;
26466 else
26467 cost = 1;
26469 break;
26471 case PROCESSOR_AMDFAM10:
26472 case PROCESSOR_BDVER1:
26473 case PROCESSOR_BDVER2:
26474 case PROCESSOR_BDVER3:
26475 case PROCESSOR_BDVER4:
26476 case PROCESSOR_BTVER1:
26477 case PROCESSOR_BTVER2:
26478 case PROCESSOR_GENERIC:
26479 /* Stack engine allows to execute push&pop instructions in parall. */
26480 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26481 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26482 return 0;
26483 /* FALLTHRU */
26485 case PROCESSOR_ATHLON:
26486 case PROCESSOR_K8:
26487 memory = get_attr_memory (insn);
26489 /* Show ability of reorder buffer to hide latency of load by executing
26490 in parallel with previous instruction in case
26491 previous instruction is not needed to compute the address. */
26492 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26493 && !ix86_agi_dependent (dep_insn, insn))
26495 enum attr_unit unit = get_attr_unit (insn);
26496 int loadcost = 3;
26498 /* Because of the difference between the length of integer and
26499 floating unit pipeline preparation stages, the memory operands
26500 for floating point are cheaper.
26502 ??? For Athlon it the difference is most probably 2. */
26503 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
26504 loadcost = 3;
26505 else
26506 loadcost = TARGET_ATHLON ? 2 : 0;
26508 if (cost >= loadcost)
26509 cost -= loadcost;
26510 else
26511 cost = 0;
26513 break;
26515 case PROCESSOR_CORE2:
26516 case PROCESSOR_NEHALEM:
26517 case PROCESSOR_SANDYBRIDGE:
26518 case PROCESSOR_HASWELL:
26519 /* Stack engine allows to execute push&pop instructions in parall. */
26520 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26521 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26522 return 0;
26524 memory = get_attr_memory (insn);
26526 /* Show ability of reorder buffer to hide latency of load by executing
26527 in parallel with previous instruction in case
26528 previous instruction is not needed to compute the address. */
26529 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26530 && !ix86_agi_dependent (dep_insn, insn))
26532 if (cost >= 4)
26533 cost -= 4;
26534 else
26535 cost = 0;
26537 break;
26539 case PROCESSOR_SILVERMONT:
26540 case PROCESSOR_KNL:
26541 case PROCESSOR_INTEL:
26542 if (!reload_completed)
26543 return cost;
26545 /* Increase cost of integer loads. */
26546 memory = get_attr_memory (dep_insn);
26547 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26549 enum attr_unit unit = get_attr_unit (dep_insn);
26550 if (unit == UNIT_INTEGER && cost == 1)
26552 if (memory == MEMORY_LOAD)
26553 cost = 3;
26554 else
26556 /* Increase cost of ld/st for short int types only
26557 because of store forwarding issue. */
26558 rtx set = single_set (dep_insn);
26559 if (set && (GET_MODE (SET_DEST (set)) == QImode
26560 || GET_MODE (SET_DEST (set)) == HImode))
26562 /* Increase cost of store/load insn if exact
26563 dependence exists and it is load insn. */
26564 enum attr_memory insn_memory = get_attr_memory (insn);
26565 if (insn_memory == MEMORY_LOAD
26566 && exact_store_load_dependency (dep_insn, insn))
26567 cost = 3;
26573 default:
26574 break;
26577 return cost;
26580 /* How many alternative schedules to try. This should be as wide as the
26581 scheduling freedom in the DFA, but no wider. Making this value too
26582 large results extra work for the scheduler. */
26584 static int
26585 ia32_multipass_dfa_lookahead (void)
26587 switch (ix86_tune)
26589 case PROCESSOR_PENTIUM:
26590 case PROCESSOR_IAMCU:
26591 return 2;
26593 case PROCESSOR_PENTIUMPRO:
26594 case PROCESSOR_K6:
26595 return 1;
26597 case PROCESSOR_BDVER1:
26598 case PROCESSOR_BDVER2:
26599 case PROCESSOR_BDVER3:
26600 case PROCESSOR_BDVER4:
26601 /* We use lookahead value 4 for BD both before and after reload
26602 schedules. Plan is to have value 8 included for O3. */
26603 return 4;
26605 case PROCESSOR_CORE2:
26606 case PROCESSOR_NEHALEM:
26607 case PROCESSOR_SANDYBRIDGE:
26608 case PROCESSOR_HASWELL:
26609 case PROCESSOR_BONNELL:
26610 case PROCESSOR_SILVERMONT:
26611 case PROCESSOR_KNL:
26612 case PROCESSOR_INTEL:
26613 /* Generally, we want haifa-sched:max_issue() to look ahead as far
26614 as many instructions can be executed on a cycle, i.e.,
26615 issue_rate. I wonder why tuning for many CPUs does not do this. */
26616 if (reload_completed)
26617 return ix86_issue_rate ();
26618 /* Don't use lookahead for pre-reload schedule to save compile time. */
26619 return 0;
26621 default:
26622 return 0;
26626 /* Return true if target platform supports macro-fusion. */
26628 static bool
26629 ix86_macro_fusion_p ()
26631 return TARGET_FUSE_CMP_AND_BRANCH;
26634 /* Check whether current microarchitecture support macro fusion
26635 for insn pair "CONDGEN + CONDJMP". Refer to
26636 "Intel Architectures Optimization Reference Manual". */
26638 static bool
26639 ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
26641 rtx src, dest;
26642 enum rtx_code ccode;
26643 rtx compare_set = NULL_RTX, test_if, cond;
26644 rtx alu_set = NULL_RTX, addr = NULL_RTX;
26646 if (!any_condjump_p (condjmp))
26647 return false;
26649 if (get_attr_type (condgen) != TYPE_TEST
26650 && get_attr_type (condgen) != TYPE_ICMP
26651 && get_attr_type (condgen) != TYPE_INCDEC
26652 && get_attr_type (condgen) != TYPE_ALU)
26653 return false;
26655 compare_set = single_set (condgen);
26656 if (compare_set == NULL_RTX
26657 && !TARGET_FUSE_ALU_AND_BRANCH)
26658 return false;
26660 if (compare_set == NULL_RTX)
26662 int i;
26663 rtx pat = PATTERN (condgen);
26664 for (i = 0; i < XVECLEN (pat, 0); i++)
26665 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
26667 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
26668 if (GET_CODE (set_src) == COMPARE)
26669 compare_set = XVECEXP (pat, 0, i);
26670 else
26671 alu_set = XVECEXP (pat, 0, i);
26674 if (compare_set == NULL_RTX)
26675 return false;
26676 src = SET_SRC (compare_set);
26677 if (GET_CODE (src) != COMPARE)
26678 return false;
26680 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
26681 supported. */
26682 if ((MEM_P (XEXP (src, 0))
26683 && CONST_INT_P (XEXP (src, 1)))
26684 || (MEM_P (XEXP (src, 1))
26685 && CONST_INT_P (XEXP (src, 0))))
26686 return false;
26688 /* No fusion for RIP-relative address. */
26689 if (MEM_P (XEXP (src, 0)))
26690 addr = XEXP (XEXP (src, 0), 0);
26691 else if (MEM_P (XEXP (src, 1)))
26692 addr = XEXP (XEXP (src, 1), 0);
26694 if (addr) {
26695 ix86_address parts;
26696 int ok = ix86_decompose_address (addr, &parts);
26697 gcc_assert (ok);
26699 if (rip_relative_addr_p (&parts))
26700 return false;
26703 test_if = SET_SRC (pc_set (condjmp));
26704 cond = XEXP (test_if, 0);
26705 ccode = GET_CODE (cond);
26706 /* Check whether conditional jump use Sign or Overflow Flags. */
26707 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
26708 && (ccode == GE
26709 || ccode == GT
26710 || ccode == LE
26711 || ccode == LT))
26712 return false;
26714 /* Return true for TYPE_TEST and TYPE_ICMP. */
26715 if (get_attr_type (condgen) == TYPE_TEST
26716 || get_attr_type (condgen) == TYPE_ICMP)
26717 return true;
26719 /* The following is the case that macro-fusion for alu + jmp. */
26720 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
26721 return false;
26723 /* No fusion for alu op with memory destination operand. */
26724 dest = SET_DEST (alu_set);
26725 if (MEM_P (dest))
26726 return false;
26728 /* Macro-fusion for inc/dec + unsigned conditional jump is not
26729 supported. */
26730 if (get_attr_type (condgen) == TYPE_INCDEC
26731 && (ccode == GEU
26732 || ccode == GTU
26733 || ccode == LEU
26734 || ccode == LTU))
26735 return false;
26737 return true;
26740 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
26741 execution. It is applied if
26742 (1) IMUL instruction is on the top of list;
26743 (2) There exists the only producer of independent IMUL instruction in
26744 ready list.
26745 Return index of IMUL producer if it was found and -1 otherwise. */
26746 static int
26747 do_reorder_for_imul (rtx_insn **ready, int n_ready)
26749 rtx_insn *insn;
26750 rtx set, insn1, insn2;
26751 sd_iterator_def sd_it;
26752 dep_t dep;
26753 int index = -1;
26754 int i;
26756 if (!TARGET_BONNELL)
26757 return index;
26759 /* Check that IMUL instruction is on the top of ready list. */
26760 insn = ready[n_ready - 1];
26761 set = single_set (insn);
26762 if (!set)
26763 return index;
26764 if (!(GET_CODE (SET_SRC (set)) == MULT
26765 && GET_MODE (SET_SRC (set)) == SImode))
26766 return index;
26768 /* Search for producer of independent IMUL instruction. */
26769 for (i = n_ready - 2; i >= 0; i--)
26771 insn = ready[i];
26772 if (!NONDEBUG_INSN_P (insn))
26773 continue;
26774 /* Skip IMUL instruction. */
26775 insn2 = PATTERN (insn);
26776 if (GET_CODE (insn2) == PARALLEL)
26777 insn2 = XVECEXP (insn2, 0, 0);
26778 if (GET_CODE (insn2) == SET
26779 && GET_CODE (SET_SRC (insn2)) == MULT
26780 && GET_MODE (SET_SRC (insn2)) == SImode)
26781 continue;
26783 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
26785 rtx con;
26786 con = DEP_CON (dep);
26787 if (!NONDEBUG_INSN_P (con))
26788 continue;
26789 insn1 = PATTERN (con);
26790 if (GET_CODE (insn1) == PARALLEL)
26791 insn1 = XVECEXP (insn1, 0, 0);
26793 if (GET_CODE (insn1) == SET
26794 && GET_CODE (SET_SRC (insn1)) == MULT
26795 && GET_MODE (SET_SRC (insn1)) == SImode)
26797 sd_iterator_def sd_it1;
26798 dep_t dep1;
26799 /* Check if there is no other dependee for IMUL. */
26800 index = i;
26801 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
26803 rtx pro;
26804 pro = DEP_PRO (dep1);
26805 if (!NONDEBUG_INSN_P (pro))
26806 continue;
26807 if (pro != insn)
26808 index = -1;
26810 if (index >= 0)
26811 break;
26814 if (index >= 0)
26815 break;
26817 return index;
26820 /* Try to find the best candidate on the top of ready list if two insns
26821 have the same priority - candidate is best if its dependees were
26822 scheduled earlier. Applied for Silvermont only.
26823 Return true if top 2 insns must be interchanged. */
26824 static bool
26825 swap_top_of_ready_list (rtx_insn **ready, int n_ready)
26827 rtx_insn *top = ready[n_ready - 1];
26828 rtx_insn *next = ready[n_ready - 2];
26829 rtx set;
26830 sd_iterator_def sd_it;
26831 dep_t dep;
26832 int clock1 = -1;
26833 int clock2 = -1;
26834 #define INSN_TICK(INSN) (HID (INSN)->tick)
26836 if (!TARGET_SILVERMONT && !TARGET_INTEL)
26837 return false;
26839 if (!NONDEBUG_INSN_P (top))
26840 return false;
26841 if (!NONJUMP_INSN_P (top))
26842 return false;
26843 if (!NONDEBUG_INSN_P (next))
26844 return false;
26845 if (!NONJUMP_INSN_P (next))
26846 return false;
26847 set = single_set (top);
26848 if (!set)
26849 return false;
26850 set = single_set (next);
26851 if (!set)
26852 return false;
26854 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26856 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26857 return false;
26858 /* Determine winner more precise. */
26859 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26861 rtx pro;
26862 pro = DEP_PRO (dep);
26863 if (!NONDEBUG_INSN_P (pro))
26864 continue;
26865 if (INSN_TICK (pro) > clock1)
26866 clock1 = INSN_TICK (pro);
26868 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26870 rtx pro;
26871 pro = DEP_PRO (dep);
26872 if (!NONDEBUG_INSN_P (pro))
26873 continue;
26874 if (INSN_TICK (pro) > clock2)
26875 clock2 = INSN_TICK (pro);
26878 if (clock1 == clock2)
26880 /* Determine winner - load must win. */
26881 enum attr_memory memory1, memory2;
26882 memory1 = get_attr_memory (top);
26883 memory2 = get_attr_memory (next);
26884 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26885 return true;
26887 return (bool) (clock2 < clock1);
26889 return false;
26890 #undef INSN_TICK
26893 /* Perform possible reodering of ready list for Atom/Silvermont only.
26894 Return issue rate. */
26895 static int
26896 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
26897 int *pn_ready, int clock_var)
26899 int issue_rate = -1;
26900 int n_ready = *pn_ready;
26901 int i;
26902 rtx_insn *insn;
26903 int index = -1;
26905 /* Set up issue rate. */
26906 issue_rate = ix86_issue_rate ();
26908 /* Do reodering for BONNELL/SILVERMONT only. */
26909 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26910 return issue_rate;
26912 /* Nothing to do if ready list contains only 1 instruction. */
26913 if (n_ready <= 1)
26914 return issue_rate;
26916 /* Do reodering for post-reload scheduler only. */
26917 if (!reload_completed)
26918 return issue_rate;
26920 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26922 if (sched_verbose > 1)
26923 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26924 INSN_UID (ready[index]));
26926 /* Put IMUL producer (ready[index]) at the top of ready list. */
26927 insn = ready[index];
26928 for (i = index; i < n_ready - 1; i++)
26929 ready[i] = ready[i + 1];
26930 ready[n_ready - 1] = insn;
26931 return issue_rate;
26934 /* Skip selective scheduling since HID is not populated in it. */
26935 if (clock_var != 0
26936 && !sel_sched_p ()
26937 && swap_top_of_ready_list (ready, n_ready))
26939 if (sched_verbose > 1)
26940 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26941 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26942 /* Swap 2 top elements of ready list. */
26943 insn = ready[n_ready - 1];
26944 ready[n_ready - 1] = ready[n_ready - 2];
26945 ready[n_ready - 2] = insn;
26947 return issue_rate;
26950 static bool
26951 ix86_class_likely_spilled_p (reg_class_t);
26953 /* Returns true if lhs of insn is HW function argument register and set up
26954 is_spilled to true if it is likely spilled HW register. */
26955 static bool
26956 insn_is_function_arg (rtx insn, bool* is_spilled)
26958 rtx dst;
26960 if (!NONDEBUG_INSN_P (insn))
26961 return false;
26962 /* Call instructions are not movable, ignore it. */
26963 if (CALL_P (insn))
26964 return false;
26965 insn = PATTERN (insn);
26966 if (GET_CODE (insn) == PARALLEL)
26967 insn = XVECEXP (insn, 0, 0);
26968 if (GET_CODE (insn) != SET)
26969 return false;
26970 dst = SET_DEST (insn);
26971 if (REG_P (dst) && HARD_REGISTER_P (dst)
26972 && ix86_function_arg_regno_p (REGNO (dst)))
26974 /* Is it likely spilled HW register? */
26975 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26976 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26977 *is_spilled = true;
26978 return true;
26980 return false;
26983 /* Add output dependencies for chain of function adjacent arguments if only
26984 there is a move to likely spilled HW register. Return first argument
26985 if at least one dependence was added or NULL otherwise. */
26986 static rtx_insn *
26987 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
26989 rtx_insn *insn;
26990 rtx_insn *last = call;
26991 rtx_insn *first_arg = NULL;
26992 bool is_spilled = false;
26994 head = PREV_INSN (head);
26996 /* Find nearest to call argument passing instruction. */
26997 while (true)
26999 last = PREV_INSN (last);
27000 if (last == head)
27001 return NULL;
27002 if (!NONDEBUG_INSN_P (last))
27003 continue;
27004 if (insn_is_function_arg (last, &is_spilled))
27005 break;
27006 return NULL;
27009 first_arg = last;
27010 while (true)
27012 insn = PREV_INSN (last);
27013 if (!INSN_P (insn))
27014 break;
27015 if (insn == head)
27016 break;
27017 if (!NONDEBUG_INSN_P (insn))
27019 last = insn;
27020 continue;
27022 if (insn_is_function_arg (insn, &is_spilled))
27024 /* Add output depdendence between two function arguments if chain
27025 of output arguments contains likely spilled HW registers. */
27026 if (is_spilled)
27027 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
27028 first_arg = last = insn;
27030 else
27031 break;
27033 if (!is_spilled)
27034 return NULL;
27035 return first_arg;
27038 /* Add output or anti dependency from insn to first_arg to restrict its code
27039 motion. */
27040 static void
27041 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
27043 rtx set;
27044 rtx tmp;
27046 /* Add anti dependencies for bounds stores. */
27047 if (INSN_P (insn)
27048 && GET_CODE (PATTERN (insn)) == PARALLEL
27049 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
27050 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_BNDSTX)
27052 add_dependence (first_arg, insn, REG_DEP_ANTI);
27053 return;
27056 set = single_set (insn);
27057 if (!set)
27058 return;
27059 tmp = SET_DEST (set);
27060 if (REG_P (tmp))
27062 /* Add output dependency to the first function argument. */
27063 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
27064 return;
27066 /* Add anti dependency. */
27067 add_dependence (first_arg, insn, REG_DEP_ANTI);
27070 /* Avoid cross block motion of function argument through adding dependency
27071 from the first non-jump instruction in bb. */
27072 static void
27073 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
27075 rtx_insn *insn = BB_END (bb);
27077 while (insn)
27079 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
27081 rtx set = single_set (insn);
27082 if (set)
27084 avoid_func_arg_motion (arg, insn);
27085 return;
27088 if (insn == BB_HEAD (bb))
27089 return;
27090 insn = PREV_INSN (insn);
27094 /* Hook for pre-reload schedule - avoid motion of function arguments
27095 passed in likely spilled HW registers. */
27096 static void
27097 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
27099 rtx_insn *insn;
27100 rtx_insn *first_arg = NULL;
27101 if (reload_completed)
27102 return;
27103 while (head != tail && DEBUG_INSN_P (head))
27104 head = NEXT_INSN (head);
27105 for (insn = tail; insn != head; insn = PREV_INSN (insn))
27106 if (INSN_P (insn) && CALL_P (insn))
27108 first_arg = add_parameter_dependencies (insn, head);
27109 if (first_arg)
27111 /* Add dependee for first argument to predecessors if only
27112 region contains more than one block. */
27113 basic_block bb = BLOCK_FOR_INSN (insn);
27114 int rgn = CONTAINING_RGN (bb->index);
27115 int nr_blks = RGN_NR_BLOCKS (rgn);
27116 /* Skip trivial regions and region head blocks that can have
27117 predecessors outside of region. */
27118 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
27120 edge e;
27121 edge_iterator ei;
27123 /* Regions are SCCs with the exception of selective
27124 scheduling with pipelining of outer blocks enabled.
27125 So also check that immediate predecessors of a non-head
27126 block are in the same region. */
27127 FOR_EACH_EDGE (e, ei, bb->preds)
27129 /* Avoid creating of loop-carried dependencies through
27130 using topological ordering in the region. */
27131 if (rgn == CONTAINING_RGN (e->src->index)
27132 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
27133 add_dependee_for_func_arg (first_arg, e->src);
27136 insn = first_arg;
27137 if (insn == head)
27138 break;
27141 else if (first_arg)
27142 avoid_func_arg_motion (first_arg, insn);
27145 /* Hook for pre-reload schedule - set priority of moves from likely spilled
27146 HW registers to maximum, to schedule them at soon as possible. These are
27147 moves from function argument registers at the top of the function entry
27148 and moves from function return value registers after call. */
27149 static int
27150 ix86_adjust_priority (rtx_insn *insn, int priority)
27152 rtx set;
27154 if (reload_completed)
27155 return priority;
27157 if (!NONDEBUG_INSN_P (insn))
27158 return priority;
27160 set = single_set (insn);
27161 if (set)
27163 rtx tmp = SET_SRC (set);
27164 if (REG_P (tmp)
27165 && HARD_REGISTER_P (tmp)
27166 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
27167 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
27168 return current_sched_info->sched_max_insns_priority;
27171 return priority;
27174 /* Model decoder of Core 2/i7.
27175 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
27176 track the instruction fetch block boundaries and make sure that long
27177 (9+ bytes) instructions are assigned to D0. */
27179 /* Maximum length of an insn that can be handled by
27180 a secondary decoder unit. '8' for Core 2/i7. */
27181 static int core2i7_secondary_decoder_max_insn_size;
27183 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
27184 '16' for Core 2/i7. */
27185 static int core2i7_ifetch_block_size;
27187 /* Maximum number of instructions decoder can handle per cycle.
27188 '6' for Core 2/i7. */
27189 static int core2i7_ifetch_block_max_insns;
27191 typedef struct ix86_first_cycle_multipass_data_ *
27192 ix86_first_cycle_multipass_data_t;
27193 typedef const struct ix86_first_cycle_multipass_data_ *
27194 const_ix86_first_cycle_multipass_data_t;
27196 /* A variable to store target state across calls to max_issue within
27197 one cycle. */
27198 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
27199 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
27201 /* Initialize DATA. */
27202 static void
27203 core2i7_first_cycle_multipass_init (void *_data)
27205 ix86_first_cycle_multipass_data_t data
27206 = (ix86_first_cycle_multipass_data_t) _data;
27208 data->ifetch_block_len = 0;
27209 data->ifetch_block_n_insns = 0;
27210 data->ready_try_change = NULL;
27211 data->ready_try_change_size = 0;
27214 /* Advancing the cycle; reset ifetch block counts. */
27215 static void
27216 core2i7_dfa_post_advance_cycle (void)
27218 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
27220 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
27222 data->ifetch_block_len = 0;
27223 data->ifetch_block_n_insns = 0;
27226 static int min_insn_size (rtx_insn *);
27228 /* Filter out insns from ready_try that the core will not be able to issue
27229 on current cycle due to decoder. */
27230 static void
27231 core2i7_first_cycle_multipass_filter_ready_try
27232 (const_ix86_first_cycle_multipass_data_t data,
27233 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
27235 while (n_ready--)
27237 rtx_insn *insn;
27238 int insn_size;
27240 if (ready_try[n_ready])
27241 continue;
27243 insn = get_ready_element (n_ready);
27244 insn_size = min_insn_size (insn);
27246 if (/* If this is a too long an insn for a secondary decoder ... */
27247 (!first_cycle_insn_p
27248 && insn_size > core2i7_secondary_decoder_max_insn_size)
27249 /* ... or it would not fit into the ifetch block ... */
27250 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
27251 /* ... or the decoder is full already ... */
27252 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
27253 /* ... mask the insn out. */
27255 ready_try[n_ready] = 1;
27257 if (data->ready_try_change)
27258 bitmap_set_bit (data->ready_try_change, n_ready);
27263 /* Prepare for a new round of multipass lookahead scheduling. */
27264 static void
27265 core2i7_first_cycle_multipass_begin (void *_data,
27266 signed char *ready_try, int n_ready,
27267 bool first_cycle_insn_p)
27269 ix86_first_cycle_multipass_data_t data
27270 = (ix86_first_cycle_multipass_data_t) _data;
27271 const_ix86_first_cycle_multipass_data_t prev_data
27272 = ix86_first_cycle_multipass_data;
27274 /* Restore the state from the end of the previous round. */
27275 data->ifetch_block_len = prev_data->ifetch_block_len;
27276 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
27278 /* Filter instructions that cannot be issued on current cycle due to
27279 decoder restrictions. */
27280 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27281 first_cycle_insn_p);
27284 /* INSN is being issued in current solution. Account for its impact on
27285 the decoder model. */
27286 static void
27287 core2i7_first_cycle_multipass_issue (void *_data,
27288 signed char *ready_try, int n_ready,
27289 rtx_insn *insn, const void *_prev_data)
27291 ix86_first_cycle_multipass_data_t data
27292 = (ix86_first_cycle_multipass_data_t) _data;
27293 const_ix86_first_cycle_multipass_data_t prev_data
27294 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
27296 int insn_size = min_insn_size (insn);
27298 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
27299 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
27300 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
27301 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
27303 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
27304 if (!data->ready_try_change)
27306 data->ready_try_change = sbitmap_alloc (n_ready);
27307 data->ready_try_change_size = n_ready;
27309 else if (data->ready_try_change_size < n_ready)
27311 data->ready_try_change = sbitmap_resize (data->ready_try_change,
27312 n_ready, 0);
27313 data->ready_try_change_size = n_ready;
27315 bitmap_clear (data->ready_try_change);
27317 /* Filter out insns from ready_try that the core will not be able to issue
27318 on current cycle due to decoder. */
27319 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27320 false);
27323 /* Revert the effect on ready_try. */
27324 static void
27325 core2i7_first_cycle_multipass_backtrack (const void *_data,
27326 signed char *ready_try,
27327 int n_ready ATTRIBUTE_UNUSED)
27329 const_ix86_first_cycle_multipass_data_t data
27330 = (const_ix86_first_cycle_multipass_data_t) _data;
27331 unsigned int i = 0;
27332 sbitmap_iterator sbi;
27334 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
27335 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
27337 ready_try[i] = 0;
27341 /* Save the result of multipass lookahead scheduling for the next round. */
27342 static void
27343 core2i7_first_cycle_multipass_end (const void *_data)
27345 const_ix86_first_cycle_multipass_data_t data
27346 = (const_ix86_first_cycle_multipass_data_t) _data;
27347 ix86_first_cycle_multipass_data_t next_data
27348 = ix86_first_cycle_multipass_data;
27350 if (data != NULL)
27352 next_data->ifetch_block_len = data->ifetch_block_len;
27353 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
27357 /* Deallocate target data. */
27358 static void
27359 core2i7_first_cycle_multipass_fini (void *_data)
27361 ix86_first_cycle_multipass_data_t data
27362 = (ix86_first_cycle_multipass_data_t) _data;
27364 if (data->ready_try_change)
27366 sbitmap_free (data->ready_try_change);
27367 data->ready_try_change = NULL;
27368 data->ready_try_change_size = 0;
27372 /* Prepare for scheduling pass. */
27373 static void
27374 ix86_sched_init_global (FILE *, int, int)
27376 /* Install scheduling hooks for current CPU. Some of these hooks are used
27377 in time-critical parts of the scheduler, so we only set them up when
27378 they are actually used. */
27379 switch (ix86_tune)
27381 case PROCESSOR_CORE2:
27382 case PROCESSOR_NEHALEM:
27383 case PROCESSOR_SANDYBRIDGE:
27384 case PROCESSOR_HASWELL:
27385 /* Do not perform multipass scheduling for pre-reload schedule
27386 to save compile time. */
27387 if (reload_completed)
27389 targetm.sched.dfa_post_advance_cycle
27390 = core2i7_dfa_post_advance_cycle;
27391 targetm.sched.first_cycle_multipass_init
27392 = core2i7_first_cycle_multipass_init;
27393 targetm.sched.first_cycle_multipass_begin
27394 = core2i7_first_cycle_multipass_begin;
27395 targetm.sched.first_cycle_multipass_issue
27396 = core2i7_first_cycle_multipass_issue;
27397 targetm.sched.first_cycle_multipass_backtrack
27398 = core2i7_first_cycle_multipass_backtrack;
27399 targetm.sched.first_cycle_multipass_end
27400 = core2i7_first_cycle_multipass_end;
27401 targetm.sched.first_cycle_multipass_fini
27402 = core2i7_first_cycle_multipass_fini;
27404 /* Set decoder parameters. */
27405 core2i7_secondary_decoder_max_insn_size = 8;
27406 core2i7_ifetch_block_size = 16;
27407 core2i7_ifetch_block_max_insns = 6;
27408 break;
27410 /* ... Fall through ... */
27411 default:
27412 targetm.sched.dfa_post_advance_cycle = NULL;
27413 targetm.sched.first_cycle_multipass_init = NULL;
27414 targetm.sched.first_cycle_multipass_begin = NULL;
27415 targetm.sched.first_cycle_multipass_issue = NULL;
27416 targetm.sched.first_cycle_multipass_backtrack = NULL;
27417 targetm.sched.first_cycle_multipass_end = NULL;
27418 targetm.sched.first_cycle_multipass_fini = NULL;
27419 break;
27424 /* Compute the alignment given to a constant that is being placed in memory.
27425 EXP is the constant and ALIGN is the alignment that the object would
27426 ordinarily have.
27427 The value of this function is used instead of that alignment to align
27428 the object. */
27431 ix86_constant_alignment (tree exp, int align)
27433 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
27434 || TREE_CODE (exp) == INTEGER_CST)
27436 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
27437 return 64;
27438 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
27439 return 128;
27441 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
27442 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
27443 return BITS_PER_WORD;
27445 return align;
27448 /* Compute the alignment for a variable for Intel MCU psABI. TYPE is
27449 the data type, and ALIGN is the alignment that the object would
27450 ordinarily have. */
27452 static int
27453 iamcu_alignment (tree type, int align)
27455 enum machine_mode mode;
27457 if (align < 32 || TYPE_USER_ALIGN (type))
27458 return align;
27460 /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
27461 bytes. */
27462 mode = TYPE_MODE (strip_array_types (type));
27463 switch (GET_MODE_CLASS (mode))
27465 case MODE_INT:
27466 case MODE_COMPLEX_INT:
27467 case MODE_COMPLEX_FLOAT:
27468 case MODE_FLOAT:
27469 case MODE_DECIMAL_FLOAT:
27470 return 32;
27471 default:
27472 return align;
27476 /* Compute the alignment for a static variable.
27477 TYPE is the data type, and ALIGN is the alignment that
27478 the object would ordinarily have. The value of this function is used
27479 instead of that alignment to align the object. */
27482 ix86_data_alignment (tree type, int align, bool opt)
27484 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
27485 for symbols from other compilation units or symbols that don't need
27486 to bind locally. In order to preserve some ABI compatibility with
27487 those compilers, ensure we don't decrease alignment from what we
27488 used to assume. */
27490 int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
27492 /* A data structure, equal or greater than the size of a cache line
27493 (64 bytes in the Pentium 4 and other recent Intel processors, including
27494 processors based on Intel Core microarchitecture) should be aligned
27495 so that its base address is a multiple of a cache line size. */
27497 int max_align
27498 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
27500 if (max_align < BITS_PER_WORD)
27501 max_align = BITS_PER_WORD;
27503 switch (ix86_align_data_type)
27505 case ix86_align_data_type_abi: opt = false; break;
27506 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
27507 case ix86_align_data_type_cacheline: break;
27510 if (TARGET_IAMCU)
27511 align = iamcu_alignment (type, align);
27513 if (opt
27514 && AGGREGATE_TYPE_P (type)
27515 && TYPE_SIZE (type)
27516 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
27518 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
27519 && align < max_align_compat)
27520 align = max_align_compat;
27521 if (wi::geu_p (TYPE_SIZE (type), max_align)
27522 && align < max_align)
27523 align = max_align;
27526 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27527 to 16byte boundary. */
27528 if (TARGET_64BIT)
27530 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
27531 && TYPE_SIZE (type)
27532 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27533 && wi::geu_p (TYPE_SIZE (type), 128)
27534 && align < 128)
27535 return 128;
27538 if (!opt)
27539 return align;
27541 if (TREE_CODE (type) == ARRAY_TYPE)
27543 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27544 return 64;
27545 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27546 return 128;
27548 else if (TREE_CODE (type) == COMPLEX_TYPE)
27551 if (TYPE_MODE (type) == DCmode && align < 64)
27552 return 64;
27553 if ((TYPE_MODE (type) == XCmode
27554 || TYPE_MODE (type) == TCmode) && align < 128)
27555 return 128;
27557 else if ((TREE_CODE (type) == RECORD_TYPE
27558 || TREE_CODE (type) == UNION_TYPE
27559 || TREE_CODE (type) == QUAL_UNION_TYPE)
27560 && TYPE_FIELDS (type))
27562 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27563 return 64;
27564 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27565 return 128;
27567 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27568 || TREE_CODE (type) == INTEGER_TYPE)
27570 if (TYPE_MODE (type) == DFmode && align < 64)
27571 return 64;
27572 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27573 return 128;
27576 return align;
27579 /* Compute the alignment for a local variable or a stack slot. EXP is
27580 the data type or decl itself, MODE is the widest mode available and
27581 ALIGN is the alignment that the object would ordinarily have. The
27582 value of this macro is used instead of that alignment to align the
27583 object. */
27585 unsigned int
27586 ix86_local_alignment (tree exp, machine_mode mode,
27587 unsigned int align)
27589 tree type, decl;
27591 if (exp && DECL_P (exp))
27593 type = TREE_TYPE (exp);
27594 decl = exp;
27596 else
27598 type = exp;
27599 decl = NULL;
27602 /* Don't do dynamic stack realignment for long long objects with
27603 -mpreferred-stack-boundary=2. */
27604 if (!TARGET_64BIT
27605 && align == 64
27606 && ix86_preferred_stack_boundary < 64
27607 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
27608 && (!type || !TYPE_USER_ALIGN (type))
27609 && (!decl || !DECL_USER_ALIGN (decl)))
27610 align = 32;
27612 /* If TYPE is NULL, we are allocating a stack slot for caller-save
27613 register in MODE. We will return the largest alignment of XF
27614 and DF. */
27615 if (!type)
27617 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
27618 align = GET_MODE_ALIGNMENT (DFmode);
27619 return align;
27622 /* Don't increase alignment for Intel MCU psABI. */
27623 if (TARGET_IAMCU)
27624 return align;
27626 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27627 to 16byte boundary. Exact wording is:
27629 An array uses the same alignment as its elements, except that a local or
27630 global array variable of length at least 16 bytes or
27631 a C99 variable-length array variable always has alignment of at least 16 bytes.
27633 This was added to allow use of aligned SSE instructions at arrays. This
27634 rule is meant for static storage (where compiler can not do the analysis
27635 by itself). We follow it for automatic variables only when convenient.
27636 We fully control everything in the function compiled and functions from
27637 other unit can not rely on the alignment.
27639 Exclude va_list type. It is the common case of local array where
27640 we can not benefit from the alignment.
27642 TODO: Probably one should optimize for size only when var is not escaping. */
27643 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
27644 && TARGET_SSE)
27646 if (AGGREGATE_TYPE_P (type)
27647 && (va_list_type_node == NULL_TREE
27648 || (TYPE_MAIN_VARIANT (type)
27649 != TYPE_MAIN_VARIANT (va_list_type_node)))
27650 && TYPE_SIZE (type)
27651 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27652 && wi::geu_p (TYPE_SIZE (type), 16)
27653 && align < 128)
27654 return 128;
27656 if (TREE_CODE (type) == ARRAY_TYPE)
27658 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27659 return 64;
27660 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27661 return 128;
27663 else if (TREE_CODE (type) == COMPLEX_TYPE)
27665 if (TYPE_MODE (type) == DCmode && align < 64)
27666 return 64;
27667 if ((TYPE_MODE (type) == XCmode
27668 || TYPE_MODE (type) == TCmode) && align < 128)
27669 return 128;
27671 else if ((TREE_CODE (type) == RECORD_TYPE
27672 || TREE_CODE (type) == UNION_TYPE
27673 || TREE_CODE (type) == QUAL_UNION_TYPE)
27674 && TYPE_FIELDS (type))
27676 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27677 return 64;
27678 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27679 return 128;
27681 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27682 || TREE_CODE (type) == INTEGER_TYPE)
27685 if (TYPE_MODE (type) == DFmode && align < 64)
27686 return 64;
27687 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27688 return 128;
27690 return align;
27693 /* Compute the minimum required alignment for dynamic stack realignment
27694 purposes for a local variable, parameter or a stack slot. EXP is
27695 the data type or decl itself, MODE is its mode and ALIGN is the
27696 alignment that the object would ordinarily have. */
27698 unsigned int
27699 ix86_minimum_alignment (tree exp, machine_mode mode,
27700 unsigned int align)
27702 tree type, decl;
27704 if (exp && DECL_P (exp))
27706 type = TREE_TYPE (exp);
27707 decl = exp;
27709 else
27711 type = exp;
27712 decl = NULL;
27715 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
27716 return align;
27718 /* Don't do dynamic stack realignment for long long objects with
27719 -mpreferred-stack-boundary=2. */
27720 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
27721 && (!type || !TYPE_USER_ALIGN (type))
27722 && (!decl || !DECL_USER_ALIGN (decl)))
27723 return 32;
27725 return align;
27728 /* Find a location for the static chain incoming to a nested function.
27729 This is a register, unless all free registers are used by arguments. */
27731 static rtx
27732 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
27734 unsigned regno;
27736 /* While this function won't be called by the middle-end when a static
27737 chain isn't needed, it's also used throughout the backend so it's
27738 easiest to keep this check centralized. */
27739 if (DECL_P (fndecl_or_type) && !DECL_STATIC_CHAIN (fndecl_or_type))
27740 return NULL;
27742 if (TARGET_64BIT)
27744 /* We always use R10 in 64-bit mode. */
27745 regno = R10_REG;
27747 else
27749 const_tree fntype, fndecl;
27750 unsigned int ccvt;
27752 /* By default in 32-bit mode we use ECX to pass the static chain. */
27753 regno = CX_REG;
27755 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
27757 fntype = TREE_TYPE (fndecl_or_type);
27758 fndecl = fndecl_or_type;
27760 else
27762 fntype = fndecl_or_type;
27763 fndecl = NULL;
27766 ccvt = ix86_get_callcvt (fntype);
27767 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
27769 /* Fastcall functions use ecx/edx for arguments, which leaves
27770 us with EAX for the static chain.
27771 Thiscall functions use ecx for arguments, which also
27772 leaves us with EAX for the static chain. */
27773 regno = AX_REG;
27775 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
27777 /* Thiscall functions use ecx for arguments, which leaves
27778 us with EAX and EDX for the static chain.
27779 We are using for abi-compatibility EAX. */
27780 regno = AX_REG;
27782 else if (ix86_function_regparm (fntype, fndecl) == 3)
27784 /* For regparm 3, we have no free call-clobbered registers in
27785 which to store the static chain. In order to implement this,
27786 we have the trampoline push the static chain to the stack.
27787 However, we can't push a value below the return address when
27788 we call the nested function directly, so we have to use an
27789 alternate entry point. For this we use ESI, and have the
27790 alternate entry point push ESI, so that things appear the
27791 same once we're executing the nested function. */
27792 if (incoming_p)
27794 if (fndecl == current_function_decl)
27795 ix86_static_chain_on_stack = true;
27796 return gen_frame_mem (SImode,
27797 plus_constant (Pmode,
27798 arg_pointer_rtx, -8));
27800 regno = SI_REG;
27804 return gen_rtx_REG (Pmode, regno);
27807 /* Emit RTL insns to initialize the variable parts of a trampoline.
27808 FNDECL is the decl of the target address; M_TRAMP is a MEM for
27809 the trampoline, and CHAIN_VALUE is an RTX for the static chain
27810 to be passed to the target function. */
27812 static void
27813 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
27815 rtx mem, fnaddr;
27816 int opcode;
27817 int offset = 0;
27819 fnaddr = XEXP (DECL_RTL (fndecl), 0);
27821 if (TARGET_64BIT)
27823 int size;
27825 /* Load the function address to r11. Try to load address using
27826 the shorter movl instead of movabs. We may want to support
27827 movq for kernel mode, but kernel does not use trampolines at
27828 the moment. FNADDR is a 32bit address and may not be in
27829 DImode when ptr_mode == SImode. Always use movl in this
27830 case. */
27831 if (ptr_mode == SImode
27832 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
27834 fnaddr = copy_addr_to_reg (fnaddr);
27836 mem = adjust_address (m_tramp, HImode, offset);
27837 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
27839 mem = adjust_address (m_tramp, SImode, offset + 2);
27840 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
27841 offset += 6;
27843 else
27845 mem = adjust_address (m_tramp, HImode, offset);
27846 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
27848 mem = adjust_address (m_tramp, DImode, offset + 2);
27849 emit_move_insn (mem, fnaddr);
27850 offset += 10;
27853 /* Load static chain using movabs to r10. Use the shorter movl
27854 instead of movabs when ptr_mode == SImode. */
27855 if (ptr_mode == SImode)
27857 opcode = 0xba41;
27858 size = 6;
27860 else
27862 opcode = 0xba49;
27863 size = 10;
27866 mem = adjust_address (m_tramp, HImode, offset);
27867 emit_move_insn (mem, gen_int_mode (opcode, HImode));
27869 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
27870 emit_move_insn (mem, chain_value);
27871 offset += size;
27873 /* Jump to r11; the last (unused) byte is a nop, only there to
27874 pad the write out to a single 32-bit store. */
27875 mem = adjust_address (m_tramp, SImode, offset);
27876 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
27877 offset += 4;
27879 else
27881 rtx disp, chain;
27883 /* Depending on the static chain location, either load a register
27884 with a constant, or push the constant to the stack. All of the
27885 instructions are the same size. */
27886 chain = ix86_static_chain (fndecl, true);
27887 if (REG_P (chain))
27889 switch (REGNO (chain))
27891 case AX_REG:
27892 opcode = 0xb8; break;
27893 case CX_REG:
27894 opcode = 0xb9; break;
27895 default:
27896 gcc_unreachable ();
27899 else
27900 opcode = 0x68;
27902 mem = adjust_address (m_tramp, QImode, offset);
27903 emit_move_insn (mem, gen_int_mode (opcode, QImode));
27905 mem = adjust_address (m_tramp, SImode, offset + 1);
27906 emit_move_insn (mem, chain_value);
27907 offset += 5;
27909 mem = adjust_address (m_tramp, QImode, offset);
27910 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27912 mem = adjust_address (m_tramp, SImode, offset + 1);
27914 /* Compute offset from the end of the jmp to the target function.
27915 In the case in which the trampoline stores the static chain on
27916 the stack, we need to skip the first insn which pushes the
27917 (call-saved) register static chain; this push is 1 byte. */
27918 offset += 5;
27919 disp = expand_binop (SImode, sub_optab, fnaddr,
27920 plus_constant (Pmode, XEXP (m_tramp, 0),
27921 offset - (MEM_P (chain) ? 1 : 0)),
27922 NULL_RTX, 1, OPTAB_DIRECT);
27923 emit_move_insn (mem, disp);
27926 gcc_assert (offset <= TRAMPOLINE_SIZE);
27928 #ifdef HAVE_ENABLE_EXECUTE_STACK
27929 #ifdef CHECK_EXECUTE_STACK_ENABLED
27930 if (CHECK_EXECUTE_STACK_ENABLED)
27931 #endif
27932 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27933 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27934 #endif
27937 /* The following file contains several enumerations and data structures
27938 built from the definitions in i386-builtin-types.def. */
27940 #include "i386-builtin-types.inc"
27942 /* Table for the ix86 builtin non-function types. */
27943 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27945 /* Retrieve an element from the above table, building some of
27946 the types lazily. */
27948 static tree
27949 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27951 unsigned int index;
27952 tree type, itype;
27954 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27956 type = ix86_builtin_type_tab[(int) tcode];
27957 if (type != NULL)
27958 return type;
27960 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27961 if (tcode <= IX86_BT_LAST_VECT)
27963 machine_mode mode;
27965 index = tcode - IX86_BT_LAST_PRIM - 1;
27966 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27967 mode = ix86_builtin_type_vect_mode[index];
27969 type = build_vector_type_for_mode (itype, mode);
27971 else
27973 int quals;
27975 index = tcode - IX86_BT_LAST_VECT - 1;
27976 if (tcode <= IX86_BT_LAST_PTR)
27977 quals = TYPE_UNQUALIFIED;
27978 else
27979 quals = TYPE_QUAL_CONST;
27981 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27982 if (quals != TYPE_UNQUALIFIED)
27983 itype = build_qualified_type (itype, quals);
27985 type = build_pointer_type (itype);
27988 ix86_builtin_type_tab[(int) tcode] = type;
27989 return type;
27992 /* Table for the ix86 builtin function types. */
27993 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27995 /* Retrieve an element from the above table, building some of
27996 the types lazily. */
27998 static tree
27999 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
28001 tree type;
28003 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
28005 type = ix86_builtin_func_type_tab[(int) tcode];
28006 if (type != NULL)
28007 return type;
28009 if (tcode <= IX86_BT_LAST_FUNC)
28011 unsigned start = ix86_builtin_func_start[(int) tcode];
28012 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
28013 tree rtype, atype, args = void_list_node;
28014 unsigned i;
28016 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
28017 for (i = after - 1; i > start; --i)
28019 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
28020 args = tree_cons (NULL, atype, args);
28023 type = build_function_type (rtype, args);
28025 else
28027 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
28028 enum ix86_builtin_func_type icode;
28030 icode = ix86_builtin_func_alias_base[index];
28031 type = ix86_get_builtin_func_type (icode);
28034 ix86_builtin_func_type_tab[(int) tcode] = type;
28035 return type;
28039 /* Codes for all the SSE/MMX builtins. */
28040 enum ix86_builtins
28042 IX86_BUILTIN_ADDPS,
28043 IX86_BUILTIN_ADDSS,
28044 IX86_BUILTIN_DIVPS,
28045 IX86_BUILTIN_DIVSS,
28046 IX86_BUILTIN_MULPS,
28047 IX86_BUILTIN_MULSS,
28048 IX86_BUILTIN_SUBPS,
28049 IX86_BUILTIN_SUBSS,
28051 IX86_BUILTIN_CMPEQPS,
28052 IX86_BUILTIN_CMPLTPS,
28053 IX86_BUILTIN_CMPLEPS,
28054 IX86_BUILTIN_CMPGTPS,
28055 IX86_BUILTIN_CMPGEPS,
28056 IX86_BUILTIN_CMPNEQPS,
28057 IX86_BUILTIN_CMPNLTPS,
28058 IX86_BUILTIN_CMPNLEPS,
28059 IX86_BUILTIN_CMPNGTPS,
28060 IX86_BUILTIN_CMPNGEPS,
28061 IX86_BUILTIN_CMPORDPS,
28062 IX86_BUILTIN_CMPUNORDPS,
28063 IX86_BUILTIN_CMPEQSS,
28064 IX86_BUILTIN_CMPLTSS,
28065 IX86_BUILTIN_CMPLESS,
28066 IX86_BUILTIN_CMPNEQSS,
28067 IX86_BUILTIN_CMPNLTSS,
28068 IX86_BUILTIN_CMPNLESS,
28069 IX86_BUILTIN_CMPORDSS,
28070 IX86_BUILTIN_CMPUNORDSS,
28072 IX86_BUILTIN_COMIEQSS,
28073 IX86_BUILTIN_COMILTSS,
28074 IX86_BUILTIN_COMILESS,
28075 IX86_BUILTIN_COMIGTSS,
28076 IX86_BUILTIN_COMIGESS,
28077 IX86_BUILTIN_COMINEQSS,
28078 IX86_BUILTIN_UCOMIEQSS,
28079 IX86_BUILTIN_UCOMILTSS,
28080 IX86_BUILTIN_UCOMILESS,
28081 IX86_BUILTIN_UCOMIGTSS,
28082 IX86_BUILTIN_UCOMIGESS,
28083 IX86_BUILTIN_UCOMINEQSS,
28085 IX86_BUILTIN_CVTPI2PS,
28086 IX86_BUILTIN_CVTPS2PI,
28087 IX86_BUILTIN_CVTSI2SS,
28088 IX86_BUILTIN_CVTSI642SS,
28089 IX86_BUILTIN_CVTSS2SI,
28090 IX86_BUILTIN_CVTSS2SI64,
28091 IX86_BUILTIN_CVTTPS2PI,
28092 IX86_BUILTIN_CVTTSS2SI,
28093 IX86_BUILTIN_CVTTSS2SI64,
28095 IX86_BUILTIN_MAXPS,
28096 IX86_BUILTIN_MAXSS,
28097 IX86_BUILTIN_MINPS,
28098 IX86_BUILTIN_MINSS,
28100 IX86_BUILTIN_LOADUPS,
28101 IX86_BUILTIN_STOREUPS,
28102 IX86_BUILTIN_MOVSS,
28104 IX86_BUILTIN_MOVHLPS,
28105 IX86_BUILTIN_MOVLHPS,
28106 IX86_BUILTIN_LOADHPS,
28107 IX86_BUILTIN_LOADLPS,
28108 IX86_BUILTIN_STOREHPS,
28109 IX86_BUILTIN_STORELPS,
28111 IX86_BUILTIN_MASKMOVQ,
28112 IX86_BUILTIN_MOVMSKPS,
28113 IX86_BUILTIN_PMOVMSKB,
28115 IX86_BUILTIN_MOVNTPS,
28116 IX86_BUILTIN_MOVNTQ,
28118 IX86_BUILTIN_LOADDQU,
28119 IX86_BUILTIN_STOREDQU,
28121 IX86_BUILTIN_PACKSSWB,
28122 IX86_BUILTIN_PACKSSDW,
28123 IX86_BUILTIN_PACKUSWB,
28125 IX86_BUILTIN_PADDB,
28126 IX86_BUILTIN_PADDW,
28127 IX86_BUILTIN_PADDD,
28128 IX86_BUILTIN_PADDQ,
28129 IX86_BUILTIN_PADDSB,
28130 IX86_BUILTIN_PADDSW,
28131 IX86_BUILTIN_PADDUSB,
28132 IX86_BUILTIN_PADDUSW,
28133 IX86_BUILTIN_PSUBB,
28134 IX86_BUILTIN_PSUBW,
28135 IX86_BUILTIN_PSUBD,
28136 IX86_BUILTIN_PSUBQ,
28137 IX86_BUILTIN_PSUBSB,
28138 IX86_BUILTIN_PSUBSW,
28139 IX86_BUILTIN_PSUBUSB,
28140 IX86_BUILTIN_PSUBUSW,
28142 IX86_BUILTIN_PAND,
28143 IX86_BUILTIN_PANDN,
28144 IX86_BUILTIN_POR,
28145 IX86_BUILTIN_PXOR,
28147 IX86_BUILTIN_PAVGB,
28148 IX86_BUILTIN_PAVGW,
28150 IX86_BUILTIN_PCMPEQB,
28151 IX86_BUILTIN_PCMPEQW,
28152 IX86_BUILTIN_PCMPEQD,
28153 IX86_BUILTIN_PCMPGTB,
28154 IX86_BUILTIN_PCMPGTW,
28155 IX86_BUILTIN_PCMPGTD,
28157 IX86_BUILTIN_PMADDWD,
28159 IX86_BUILTIN_PMAXSW,
28160 IX86_BUILTIN_PMAXUB,
28161 IX86_BUILTIN_PMINSW,
28162 IX86_BUILTIN_PMINUB,
28164 IX86_BUILTIN_PMULHUW,
28165 IX86_BUILTIN_PMULHW,
28166 IX86_BUILTIN_PMULLW,
28168 IX86_BUILTIN_PSADBW,
28169 IX86_BUILTIN_PSHUFW,
28171 IX86_BUILTIN_PSLLW,
28172 IX86_BUILTIN_PSLLD,
28173 IX86_BUILTIN_PSLLQ,
28174 IX86_BUILTIN_PSRAW,
28175 IX86_BUILTIN_PSRAD,
28176 IX86_BUILTIN_PSRLW,
28177 IX86_BUILTIN_PSRLD,
28178 IX86_BUILTIN_PSRLQ,
28179 IX86_BUILTIN_PSLLWI,
28180 IX86_BUILTIN_PSLLDI,
28181 IX86_BUILTIN_PSLLQI,
28182 IX86_BUILTIN_PSRAWI,
28183 IX86_BUILTIN_PSRADI,
28184 IX86_BUILTIN_PSRLWI,
28185 IX86_BUILTIN_PSRLDI,
28186 IX86_BUILTIN_PSRLQI,
28188 IX86_BUILTIN_PUNPCKHBW,
28189 IX86_BUILTIN_PUNPCKHWD,
28190 IX86_BUILTIN_PUNPCKHDQ,
28191 IX86_BUILTIN_PUNPCKLBW,
28192 IX86_BUILTIN_PUNPCKLWD,
28193 IX86_BUILTIN_PUNPCKLDQ,
28195 IX86_BUILTIN_SHUFPS,
28197 IX86_BUILTIN_RCPPS,
28198 IX86_BUILTIN_RCPSS,
28199 IX86_BUILTIN_RSQRTPS,
28200 IX86_BUILTIN_RSQRTPS_NR,
28201 IX86_BUILTIN_RSQRTSS,
28202 IX86_BUILTIN_RSQRTF,
28203 IX86_BUILTIN_SQRTPS,
28204 IX86_BUILTIN_SQRTPS_NR,
28205 IX86_BUILTIN_SQRTSS,
28207 IX86_BUILTIN_UNPCKHPS,
28208 IX86_BUILTIN_UNPCKLPS,
28210 IX86_BUILTIN_ANDPS,
28211 IX86_BUILTIN_ANDNPS,
28212 IX86_BUILTIN_ORPS,
28213 IX86_BUILTIN_XORPS,
28215 IX86_BUILTIN_EMMS,
28216 IX86_BUILTIN_LDMXCSR,
28217 IX86_BUILTIN_STMXCSR,
28218 IX86_BUILTIN_SFENCE,
28220 IX86_BUILTIN_FXSAVE,
28221 IX86_BUILTIN_FXRSTOR,
28222 IX86_BUILTIN_FXSAVE64,
28223 IX86_BUILTIN_FXRSTOR64,
28225 IX86_BUILTIN_XSAVE,
28226 IX86_BUILTIN_XRSTOR,
28227 IX86_BUILTIN_XSAVE64,
28228 IX86_BUILTIN_XRSTOR64,
28230 IX86_BUILTIN_XSAVEOPT,
28231 IX86_BUILTIN_XSAVEOPT64,
28233 IX86_BUILTIN_XSAVEC,
28234 IX86_BUILTIN_XSAVEC64,
28236 IX86_BUILTIN_XSAVES,
28237 IX86_BUILTIN_XRSTORS,
28238 IX86_BUILTIN_XSAVES64,
28239 IX86_BUILTIN_XRSTORS64,
28241 /* 3DNow! Original */
28242 IX86_BUILTIN_FEMMS,
28243 IX86_BUILTIN_PAVGUSB,
28244 IX86_BUILTIN_PF2ID,
28245 IX86_BUILTIN_PFACC,
28246 IX86_BUILTIN_PFADD,
28247 IX86_BUILTIN_PFCMPEQ,
28248 IX86_BUILTIN_PFCMPGE,
28249 IX86_BUILTIN_PFCMPGT,
28250 IX86_BUILTIN_PFMAX,
28251 IX86_BUILTIN_PFMIN,
28252 IX86_BUILTIN_PFMUL,
28253 IX86_BUILTIN_PFRCP,
28254 IX86_BUILTIN_PFRCPIT1,
28255 IX86_BUILTIN_PFRCPIT2,
28256 IX86_BUILTIN_PFRSQIT1,
28257 IX86_BUILTIN_PFRSQRT,
28258 IX86_BUILTIN_PFSUB,
28259 IX86_BUILTIN_PFSUBR,
28260 IX86_BUILTIN_PI2FD,
28261 IX86_BUILTIN_PMULHRW,
28263 /* 3DNow! Athlon Extensions */
28264 IX86_BUILTIN_PF2IW,
28265 IX86_BUILTIN_PFNACC,
28266 IX86_BUILTIN_PFPNACC,
28267 IX86_BUILTIN_PI2FW,
28268 IX86_BUILTIN_PSWAPDSI,
28269 IX86_BUILTIN_PSWAPDSF,
28271 /* SSE2 */
28272 IX86_BUILTIN_ADDPD,
28273 IX86_BUILTIN_ADDSD,
28274 IX86_BUILTIN_DIVPD,
28275 IX86_BUILTIN_DIVSD,
28276 IX86_BUILTIN_MULPD,
28277 IX86_BUILTIN_MULSD,
28278 IX86_BUILTIN_SUBPD,
28279 IX86_BUILTIN_SUBSD,
28281 IX86_BUILTIN_CMPEQPD,
28282 IX86_BUILTIN_CMPLTPD,
28283 IX86_BUILTIN_CMPLEPD,
28284 IX86_BUILTIN_CMPGTPD,
28285 IX86_BUILTIN_CMPGEPD,
28286 IX86_BUILTIN_CMPNEQPD,
28287 IX86_BUILTIN_CMPNLTPD,
28288 IX86_BUILTIN_CMPNLEPD,
28289 IX86_BUILTIN_CMPNGTPD,
28290 IX86_BUILTIN_CMPNGEPD,
28291 IX86_BUILTIN_CMPORDPD,
28292 IX86_BUILTIN_CMPUNORDPD,
28293 IX86_BUILTIN_CMPEQSD,
28294 IX86_BUILTIN_CMPLTSD,
28295 IX86_BUILTIN_CMPLESD,
28296 IX86_BUILTIN_CMPNEQSD,
28297 IX86_BUILTIN_CMPNLTSD,
28298 IX86_BUILTIN_CMPNLESD,
28299 IX86_BUILTIN_CMPORDSD,
28300 IX86_BUILTIN_CMPUNORDSD,
28302 IX86_BUILTIN_COMIEQSD,
28303 IX86_BUILTIN_COMILTSD,
28304 IX86_BUILTIN_COMILESD,
28305 IX86_BUILTIN_COMIGTSD,
28306 IX86_BUILTIN_COMIGESD,
28307 IX86_BUILTIN_COMINEQSD,
28308 IX86_BUILTIN_UCOMIEQSD,
28309 IX86_BUILTIN_UCOMILTSD,
28310 IX86_BUILTIN_UCOMILESD,
28311 IX86_BUILTIN_UCOMIGTSD,
28312 IX86_BUILTIN_UCOMIGESD,
28313 IX86_BUILTIN_UCOMINEQSD,
28315 IX86_BUILTIN_MAXPD,
28316 IX86_BUILTIN_MAXSD,
28317 IX86_BUILTIN_MINPD,
28318 IX86_BUILTIN_MINSD,
28320 IX86_BUILTIN_ANDPD,
28321 IX86_BUILTIN_ANDNPD,
28322 IX86_BUILTIN_ORPD,
28323 IX86_BUILTIN_XORPD,
28325 IX86_BUILTIN_SQRTPD,
28326 IX86_BUILTIN_SQRTSD,
28328 IX86_BUILTIN_UNPCKHPD,
28329 IX86_BUILTIN_UNPCKLPD,
28331 IX86_BUILTIN_SHUFPD,
28333 IX86_BUILTIN_LOADUPD,
28334 IX86_BUILTIN_STOREUPD,
28335 IX86_BUILTIN_MOVSD,
28337 IX86_BUILTIN_LOADHPD,
28338 IX86_BUILTIN_LOADLPD,
28340 IX86_BUILTIN_CVTDQ2PD,
28341 IX86_BUILTIN_CVTDQ2PS,
28343 IX86_BUILTIN_CVTPD2DQ,
28344 IX86_BUILTIN_CVTPD2PI,
28345 IX86_BUILTIN_CVTPD2PS,
28346 IX86_BUILTIN_CVTTPD2DQ,
28347 IX86_BUILTIN_CVTTPD2PI,
28349 IX86_BUILTIN_CVTPI2PD,
28350 IX86_BUILTIN_CVTSI2SD,
28351 IX86_BUILTIN_CVTSI642SD,
28353 IX86_BUILTIN_CVTSD2SI,
28354 IX86_BUILTIN_CVTSD2SI64,
28355 IX86_BUILTIN_CVTSD2SS,
28356 IX86_BUILTIN_CVTSS2SD,
28357 IX86_BUILTIN_CVTTSD2SI,
28358 IX86_BUILTIN_CVTTSD2SI64,
28360 IX86_BUILTIN_CVTPS2DQ,
28361 IX86_BUILTIN_CVTPS2PD,
28362 IX86_BUILTIN_CVTTPS2DQ,
28364 IX86_BUILTIN_MOVNTI,
28365 IX86_BUILTIN_MOVNTI64,
28366 IX86_BUILTIN_MOVNTPD,
28367 IX86_BUILTIN_MOVNTDQ,
28369 IX86_BUILTIN_MOVQ128,
28371 /* SSE2 MMX */
28372 IX86_BUILTIN_MASKMOVDQU,
28373 IX86_BUILTIN_MOVMSKPD,
28374 IX86_BUILTIN_PMOVMSKB128,
28376 IX86_BUILTIN_PACKSSWB128,
28377 IX86_BUILTIN_PACKSSDW128,
28378 IX86_BUILTIN_PACKUSWB128,
28380 IX86_BUILTIN_PADDB128,
28381 IX86_BUILTIN_PADDW128,
28382 IX86_BUILTIN_PADDD128,
28383 IX86_BUILTIN_PADDQ128,
28384 IX86_BUILTIN_PADDSB128,
28385 IX86_BUILTIN_PADDSW128,
28386 IX86_BUILTIN_PADDUSB128,
28387 IX86_BUILTIN_PADDUSW128,
28388 IX86_BUILTIN_PSUBB128,
28389 IX86_BUILTIN_PSUBW128,
28390 IX86_BUILTIN_PSUBD128,
28391 IX86_BUILTIN_PSUBQ128,
28392 IX86_BUILTIN_PSUBSB128,
28393 IX86_BUILTIN_PSUBSW128,
28394 IX86_BUILTIN_PSUBUSB128,
28395 IX86_BUILTIN_PSUBUSW128,
28397 IX86_BUILTIN_PAND128,
28398 IX86_BUILTIN_PANDN128,
28399 IX86_BUILTIN_POR128,
28400 IX86_BUILTIN_PXOR128,
28402 IX86_BUILTIN_PAVGB128,
28403 IX86_BUILTIN_PAVGW128,
28405 IX86_BUILTIN_PCMPEQB128,
28406 IX86_BUILTIN_PCMPEQW128,
28407 IX86_BUILTIN_PCMPEQD128,
28408 IX86_BUILTIN_PCMPGTB128,
28409 IX86_BUILTIN_PCMPGTW128,
28410 IX86_BUILTIN_PCMPGTD128,
28412 IX86_BUILTIN_PMADDWD128,
28414 IX86_BUILTIN_PMAXSW128,
28415 IX86_BUILTIN_PMAXUB128,
28416 IX86_BUILTIN_PMINSW128,
28417 IX86_BUILTIN_PMINUB128,
28419 IX86_BUILTIN_PMULUDQ,
28420 IX86_BUILTIN_PMULUDQ128,
28421 IX86_BUILTIN_PMULHUW128,
28422 IX86_BUILTIN_PMULHW128,
28423 IX86_BUILTIN_PMULLW128,
28425 IX86_BUILTIN_PSADBW128,
28426 IX86_BUILTIN_PSHUFHW,
28427 IX86_BUILTIN_PSHUFLW,
28428 IX86_BUILTIN_PSHUFD,
28430 IX86_BUILTIN_PSLLDQI128,
28431 IX86_BUILTIN_PSLLWI128,
28432 IX86_BUILTIN_PSLLDI128,
28433 IX86_BUILTIN_PSLLQI128,
28434 IX86_BUILTIN_PSRAWI128,
28435 IX86_BUILTIN_PSRADI128,
28436 IX86_BUILTIN_PSRLDQI128,
28437 IX86_BUILTIN_PSRLWI128,
28438 IX86_BUILTIN_PSRLDI128,
28439 IX86_BUILTIN_PSRLQI128,
28441 IX86_BUILTIN_PSLLDQ128,
28442 IX86_BUILTIN_PSLLW128,
28443 IX86_BUILTIN_PSLLD128,
28444 IX86_BUILTIN_PSLLQ128,
28445 IX86_BUILTIN_PSRAW128,
28446 IX86_BUILTIN_PSRAD128,
28447 IX86_BUILTIN_PSRLW128,
28448 IX86_BUILTIN_PSRLD128,
28449 IX86_BUILTIN_PSRLQ128,
28451 IX86_BUILTIN_PUNPCKHBW128,
28452 IX86_BUILTIN_PUNPCKHWD128,
28453 IX86_BUILTIN_PUNPCKHDQ128,
28454 IX86_BUILTIN_PUNPCKHQDQ128,
28455 IX86_BUILTIN_PUNPCKLBW128,
28456 IX86_BUILTIN_PUNPCKLWD128,
28457 IX86_BUILTIN_PUNPCKLDQ128,
28458 IX86_BUILTIN_PUNPCKLQDQ128,
28460 IX86_BUILTIN_CLFLUSH,
28461 IX86_BUILTIN_MFENCE,
28462 IX86_BUILTIN_LFENCE,
28463 IX86_BUILTIN_PAUSE,
28465 IX86_BUILTIN_FNSTENV,
28466 IX86_BUILTIN_FLDENV,
28467 IX86_BUILTIN_FNSTSW,
28468 IX86_BUILTIN_FNCLEX,
28470 IX86_BUILTIN_BSRSI,
28471 IX86_BUILTIN_BSRDI,
28472 IX86_BUILTIN_RDPMC,
28473 IX86_BUILTIN_RDTSC,
28474 IX86_BUILTIN_RDTSCP,
28475 IX86_BUILTIN_ROLQI,
28476 IX86_BUILTIN_ROLHI,
28477 IX86_BUILTIN_RORQI,
28478 IX86_BUILTIN_RORHI,
28480 /* SSE3. */
28481 IX86_BUILTIN_ADDSUBPS,
28482 IX86_BUILTIN_HADDPS,
28483 IX86_BUILTIN_HSUBPS,
28484 IX86_BUILTIN_MOVSHDUP,
28485 IX86_BUILTIN_MOVSLDUP,
28486 IX86_BUILTIN_ADDSUBPD,
28487 IX86_BUILTIN_HADDPD,
28488 IX86_BUILTIN_HSUBPD,
28489 IX86_BUILTIN_LDDQU,
28491 IX86_BUILTIN_MONITOR,
28492 IX86_BUILTIN_MWAIT,
28494 /* SSSE3. */
28495 IX86_BUILTIN_PHADDW,
28496 IX86_BUILTIN_PHADDD,
28497 IX86_BUILTIN_PHADDSW,
28498 IX86_BUILTIN_PHSUBW,
28499 IX86_BUILTIN_PHSUBD,
28500 IX86_BUILTIN_PHSUBSW,
28501 IX86_BUILTIN_PMADDUBSW,
28502 IX86_BUILTIN_PMULHRSW,
28503 IX86_BUILTIN_PSHUFB,
28504 IX86_BUILTIN_PSIGNB,
28505 IX86_BUILTIN_PSIGNW,
28506 IX86_BUILTIN_PSIGND,
28507 IX86_BUILTIN_PALIGNR,
28508 IX86_BUILTIN_PABSB,
28509 IX86_BUILTIN_PABSW,
28510 IX86_BUILTIN_PABSD,
28512 IX86_BUILTIN_PHADDW128,
28513 IX86_BUILTIN_PHADDD128,
28514 IX86_BUILTIN_PHADDSW128,
28515 IX86_BUILTIN_PHSUBW128,
28516 IX86_BUILTIN_PHSUBD128,
28517 IX86_BUILTIN_PHSUBSW128,
28518 IX86_BUILTIN_PMADDUBSW128,
28519 IX86_BUILTIN_PMULHRSW128,
28520 IX86_BUILTIN_PSHUFB128,
28521 IX86_BUILTIN_PSIGNB128,
28522 IX86_BUILTIN_PSIGNW128,
28523 IX86_BUILTIN_PSIGND128,
28524 IX86_BUILTIN_PALIGNR128,
28525 IX86_BUILTIN_PABSB128,
28526 IX86_BUILTIN_PABSW128,
28527 IX86_BUILTIN_PABSD128,
28529 /* AMDFAM10 - SSE4A New Instructions. */
28530 IX86_BUILTIN_MOVNTSD,
28531 IX86_BUILTIN_MOVNTSS,
28532 IX86_BUILTIN_EXTRQI,
28533 IX86_BUILTIN_EXTRQ,
28534 IX86_BUILTIN_INSERTQI,
28535 IX86_BUILTIN_INSERTQ,
28537 /* SSE4.1. */
28538 IX86_BUILTIN_BLENDPD,
28539 IX86_BUILTIN_BLENDPS,
28540 IX86_BUILTIN_BLENDVPD,
28541 IX86_BUILTIN_BLENDVPS,
28542 IX86_BUILTIN_PBLENDVB128,
28543 IX86_BUILTIN_PBLENDW128,
28545 IX86_BUILTIN_DPPD,
28546 IX86_BUILTIN_DPPS,
28548 IX86_BUILTIN_INSERTPS128,
28550 IX86_BUILTIN_MOVNTDQA,
28551 IX86_BUILTIN_MPSADBW128,
28552 IX86_BUILTIN_PACKUSDW128,
28553 IX86_BUILTIN_PCMPEQQ,
28554 IX86_BUILTIN_PHMINPOSUW128,
28556 IX86_BUILTIN_PMAXSB128,
28557 IX86_BUILTIN_PMAXSD128,
28558 IX86_BUILTIN_PMAXUD128,
28559 IX86_BUILTIN_PMAXUW128,
28561 IX86_BUILTIN_PMINSB128,
28562 IX86_BUILTIN_PMINSD128,
28563 IX86_BUILTIN_PMINUD128,
28564 IX86_BUILTIN_PMINUW128,
28566 IX86_BUILTIN_PMOVSXBW128,
28567 IX86_BUILTIN_PMOVSXBD128,
28568 IX86_BUILTIN_PMOVSXBQ128,
28569 IX86_BUILTIN_PMOVSXWD128,
28570 IX86_BUILTIN_PMOVSXWQ128,
28571 IX86_BUILTIN_PMOVSXDQ128,
28573 IX86_BUILTIN_PMOVZXBW128,
28574 IX86_BUILTIN_PMOVZXBD128,
28575 IX86_BUILTIN_PMOVZXBQ128,
28576 IX86_BUILTIN_PMOVZXWD128,
28577 IX86_BUILTIN_PMOVZXWQ128,
28578 IX86_BUILTIN_PMOVZXDQ128,
28580 IX86_BUILTIN_PMULDQ128,
28581 IX86_BUILTIN_PMULLD128,
28583 IX86_BUILTIN_ROUNDSD,
28584 IX86_BUILTIN_ROUNDSS,
28586 IX86_BUILTIN_ROUNDPD,
28587 IX86_BUILTIN_ROUNDPS,
28589 IX86_BUILTIN_FLOORPD,
28590 IX86_BUILTIN_CEILPD,
28591 IX86_BUILTIN_TRUNCPD,
28592 IX86_BUILTIN_RINTPD,
28593 IX86_BUILTIN_ROUNDPD_AZ,
28595 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
28596 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
28597 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
28599 IX86_BUILTIN_FLOORPS,
28600 IX86_BUILTIN_CEILPS,
28601 IX86_BUILTIN_TRUNCPS,
28602 IX86_BUILTIN_RINTPS,
28603 IX86_BUILTIN_ROUNDPS_AZ,
28605 IX86_BUILTIN_FLOORPS_SFIX,
28606 IX86_BUILTIN_CEILPS_SFIX,
28607 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
28609 IX86_BUILTIN_PTESTZ,
28610 IX86_BUILTIN_PTESTC,
28611 IX86_BUILTIN_PTESTNZC,
28613 IX86_BUILTIN_VEC_INIT_V2SI,
28614 IX86_BUILTIN_VEC_INIT_V4HI,
28615 IX86_BUILTIN_VEC_INIT_V8QI,
28616 IX86_BUILTIN_VEC_EXT_V2DF,
28617 IX86_BUILTIN_VEC_EXT_V2DI,
28618 IX86_BUILTIN_VEC_EXT_V4SF,
28619 IX86_BUILTIN_VEC_EXT_V4SI,
28620 IX86_BUILTIN_VEC_EXT_V8HI,
28621 IX86_BUILTIN_VEC_EXT_V2SI,
28622 IX86_BUILTIN_VEC_EXT_V4HI,
28623 IX86_BUILTIN_VEC_EXT_V16QI,
28624 IX86_BUILTIN_VEC_SET_V2DI,
28625 IX86_BUILTIN_VEC_SET_V4SF,
28626 IX86_BUILTIN_VEC_SET_V4SI,
28627 IX86_BUILTIN_VEC_SET_V8HI,
28628 IX86_BUILTIN_VEC_SET_V4HI,
28629 IX86_BUILTIN_VEC_SET_V16QI,
28631 IX86_BUILTIN_VEC_PACK_SFIX,
28632 IX86_BUILTIN_VEC_PACK_SFIX256,
28634 /* SSE4.2. */
28635 IX86_BUILTIN_CRC32QI,
28636 IX86_BUILTIN_CRC32HI,
28637 IX86_BUILTIN_CRC32SI,
28638 IX86_BUILTIN_CRC32DI,
28640 IX86_BUILTIN_PCMPESTRI128,
28641 IX86_BUILTIN_PCMPESTRM128,
28642 IX86_BUILTIN_PCMPESTRA128,
28643 IX86_BUILTIN_PCMPESTRC128,
28644 IX86_BUILTIN_PCMPESTRO128,
28645 IX86_BUILTIN_PCMPESTRS128,
28646 IX86_BUILTIN_PCMPESTRZ128,
28647 IX86_BUILTIN_PCMPISTRI128,
28648 IX86_BUILTIN_PCMPISTRM128,
28649 IX86_BUILTIN_PCMPISTRA128,
28650 IX86_BUILTIN_PCMPISTRC128,
28651 IX86_BUILTIN_PCMPISTRO128,
28652 IX86_BUILTIN_PCMPISTRS128,
28653 IX86_BUILTIN_PCMPISTRZ128,
28655 IX86_BUILTIN_PCMPGTQ,
28657 /* AES instructions */
28658 IX86_BUILTIN_AESENC128,
28659 IX86_BUILTIN_AESENCLAST128,
28660 IX86_BUILTIN_AESDEC128,
28661 IX86_BUILTIN_AESDECLAST128,
28662 IX86_BUILTIN_AESIMC128,
28663 IX86_BUILTIN_AESKEYGENASSIST128,
28665 /* PCLMUL instruction */
28666 IX86_BUILTIN_PCLMULQDQ128,
28668 /* AVX */
28669 IX86_BUILTIN_ADDPD256,
28670 IX86_BUILTIN_ADDPS256,
28671 IX86_BUILTIN_ADDSUBPD256,
28672 IX86_BUILTIN_ADDSUBPS256,
28673 IX86_BUILTIN_ANDPD256,
28674 IX86_BUILTIN_ANDPS256,
28675 IX86_BUILTIN_ANDNPD256,
28676 IX86_BUILTIN_ANDNPS256,
28677 IX86_BUILTIN_BLENDPD256,
28678 IX86_BUILTIN_BLENDPS256,
28679 IX86_BUILTIN_BLENDVPD256,
28680 IX86_BUILTIN_BLENDVPS256,
28681 IX86_BUILTIN_DIVPD256,
28682 IX86_BUILTIN_DIVPS256,
28683 IX86_BUILTIN_DPPS256,
28684 IX86_BUILTIN_HADDPD256,
28685 IX86_BUILTIN_HADDPS256,
28686 IX86_BUILTIN_HSUBPD256,
28687 IX86_BUILTIN_HSUBPS256,
28688 IX86_BUILTIN_MAXPD256,
28689 IX86_BUILTIN_MAXPS256,
28690 IX86_BUILTIN_MINPD256,
28691 IX86_BUILTIN_MINPS256,
28692 IX86_BUILTIN_MULPD256,
28693 IX86_BUILTIN_MULPS256,
28694 IX86_BUILTIN_ORPD256,
28695 IX86_BUILTIN_ORPS256,
28696 IX86_BUILTIN_SHUFPD256,
28697 IX86_BUILTIN_SHUFPS256,
28698 IX86_BUILTIN_SUBPD256,
28699 IX86_BUILTIN_SUBPS256,
28700 IX86_BUILTIN_XORPD256,
28701 IX86_BUILTIN_XORPS256,
28702 IX86_BUILTIN_CMPSD,
28703 IX86_BUILTIN_CMPSS,
28704 IX86_BUILTIN_CMPPD,
28705 IX86_BUILTIN_CMPPS,
28706 IX86_BUILTIN_CMPPD256,
28707 IX86_BUILTIN_CMPPS256,
28708 IX86_BUILTIN_CVTDQ2PD256,
28709 IX86_BUILTIN_CVTDQ2PS256,
28710 IX86_BUILTIN_CVTPD2PS256,
28711 IX86_BUILTIN_CVTPS2DQ256,
28712 IX86_BUILTIN_CVTPS2PD256,
28713 IX86_BUILTIN_CVTTPD2DQ256,
28714 IX86_BUILTIN_CVTPD2DQ256,
28715 IX86_BUILTIN_CVTTPS2DQ256,
28716 IX86_BUILTIN_EXTRACTF128PD256,
28717 IX86_BUILTIN_EXTRACTF128PS256,
28718 IX86_BUILTIN_EXTRACTF128SI256,
28719 IX86_BUILTIN_VZEROALL,
28720 IX86_BUILTIN_VZEROUPPER,
28721 IX86_BUILTIN_VPERMILVARPD,
28722 IX86_BUILTIN_VPERMILVARPS,
28723 IX86_BUILTIN_VPERMILVARPD256,
28724 IX86_BUILTIN_VPERMILVARPS256,
28725 IX86_BUILTIN_VPERMILPD,
28726 IX86_BUILTIN_VPERMILPS,
28727 IX86_BUILTIN_VPERMILPD256,
28728 IX86_BUILTIN_VPERMILPS256,
28729 IX86_BUILTIN_VPERMIL2PD,
28730 IX86_BUILTIN_VPERMIL2PS,
28731 IX86_BUILTIN_VPERMIL2PD256,
28732 IX86_BUILTIN_VPERMIL2PS256,
28733 IX86_BUILTIN_VPERM2F128PD256,
28734 IX86_BUILTIN_VPERM2F128PS256,
28735 IX86_BUILTIN_VPERM2F128SI256,
28736 IX86_BUILTIN_VBROADCASTSS,
28737 IX86_BUILTIN_VBROADCASTSD256,
28738 IX86_BUILTIN_VBROADCASTSS256,
28739 IX86_BUILTIN_VBROADCASTPD256,
28740 IX86_BUILTIN_VBROADCASTPS256,
28741 IX86_BUILTIN_VINSERTF128PD256,
28742 IX86_BUILTIN_VINSERTF128PS256,
28743 IX86_BUILTIN_VINSERTF128SI256,
28744 IX86_BUILTIN_LOADUPD256,
28745 IX86_BUILTIN_LOADUPS256,
28746 IX86_BUILTIN_STOREUPD256,
28747 IX86_BUILTIN_STOREUPS256,
28748 IX86_BUILTIN_LDDQU256,
28749 IX86_BUILTIN_MOVNTDQ256,
28750 IX86_BUILTIN_MOVNTPD256,
28751 IX86_BUILTIN_MOVNTPS256,
28752 IX86_BUILTIN_LOADDQU256,
28753 IX86_BUILTIN_STOREDQU256,
28754 IX86_BUILTIN_MASKLOADPD,
28755 IX86_BUILTIN_MASKLOADPS,
28756 IX86_BUILTIN_MASKSTOREPD,
28757 IX86_BUILTIN_MASKSTOREPS,
28758 IX86_BUILTIN_MASKLOADPD256,
28759 IX86_BUILTIN_MASKLOADPS256,
28760 IX86_BUILTIN_MASKSTOREPD256,
28761 IX86_BUILTIN_MASKSTOREPS256,
28762 IX86_BUILTIN_MOVSHDUP256,
28763 IX86_BUILTIN_MOVSLDUP256,
28764 IX86_BUILTIN_MOVDDUP256,
28766 IX86_BUILTIN_SQRTPD256,
28767 IX86_BUILTIN_SQRTPS256,
28768 IX86_BUILTIN_SQRTPS_NR256,
28769 IX86_BUILTIN_RSQRTPS256,
28770 IX86_BUILTIN_RSQRTPS_NR256,
28772 IX86_BUILTIN_RCPPS256,
28774 IX86_BUILTIN_ROUNDPD256,
28775 IX86_BUILTIN_ROUNDPS256,
28777 IX86_BUILTIN_FLOORPD256,
28778 IX86_BUILTIN_CEILPD256,
28779 IX86_BUILTIN_TRUNCPD256,
28780 IX86_BUILTIN_RINTPD256,
28781 IX86_BUILTIN_ROUNDPD_AZ256,
28783 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
28784 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
28785 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
28787 IX86_BUILTIN_FLOORPS256,
28788 IX86_BUILTIN_CEILPS256,
28789 IX86_BUILTIN_TRUNCPS256,
28790 IX86_BUILTIN_RINTPS256,
28791 IX86_BUILTIN_ROUNDPS_AZ256,
28793 IX86_BUILTIN_FLOORPS_SFIX256,
28794 IX86_BUILTIN_CEILPS_SFIX256,
28795 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
28797 IX86_BUILTIN_UNPCKHPD256,
28798 IX86_BUILTIN_UNPCKLPD256,
28799 IX86_BUILTIN_UNPCKHPS256,
28800 IX86_BUILTIN_UNPCKLPS256,
28802 IX86_BUILTIN_SI256_SI,
28803 IX86_BUILTIN_PS256_PS,
28804 IX86_BUILTIN_PD256_PD,
28805 IX86_BUILTIN_SI_SI256,
28806 IX86_BUILTIN_PS_PS256,
28807 IX86_BUILTIN_PD_PD256,
28809 IX86_BUILTIN_VTESTZPD,
28810 IX86_BUILTIN_VTESTCPD,
28811 IX86_BUILTIN_VTESTNZCPD,
28812 IX86_BUILTIN_VTESTZPS,
28813 IX86_BUILTIN_VTESTCPS,
28814 IX86_BUILTIN_VTESTNZCPS,
28815 IX86_BUILTIN_VTESTZPD256,
28816 IX86_BUILTIN_VTESTCPD256,
28817 IX86_BUILTIN_VTESTNZCPD256,
28818 IX86_BUILTIN_VTESTZPS256,
28819 IX86_BUILTIN_VTESTCPS256,
28820 IX86_BUILTIN_VTESTNZCPS256,
28821 IX86_BUILTIN_PTESTZ256,
28822 IX86_BUILTIN_PTESTC256,
28823 IX86_BUILTIN_PTESTNZC256,
28825 IX86_BUILTIN_MOVMSKPD256,
28826 IX86_BUILTIN_MOVMSKPS256,
28828 /* AVX2 */
28829 IX86_BUILTIN_MPSADBW256,
28830 IX86_BUILTIN_PABSB256,
28831 IX86_BUILTIN_PABSW256,
28832 IX86_BUILTIN_PABSD256,
28833 IX86_BUILTIN_PACKSSDW256,
28834 IX86_BUILTIN_PACKSSWB256,
28835 IX86_BUILTIN_PACKUSDW256,
28836 IX86_BUILTIN_PACKUSWB256,
28837 IX86_BUILTIN_PADDB256,
28838 IX86_BUILTIN_PADDW256,
28839 IX86_BUILTIN_PADDD256,
28840 IX86_BUILTIN_PADDQ256,
28841 IX86_BUILTIN_PADDSB256,
28842 IX86_BUILTIN_PADDSW256,
28843 IX86_BUILTIN_PADDUSB256,
28844 IX86_BUILTIN_PADDUSW256,
28845 IX86_BUILTIN_PALIGNR256,
28846 IX86_BUILTIN_AND256I,
28847 IX86_BUILTIN_ANDNOT256I,
28848 IX86_BUILTIN_PAVGB256,
28849 IX86_BUILTIN_PAVGW256,
28850 IX86_BUILTIN_PBLENDVB256,
28851 IX86_BUILTIN_PBLENDVW256,
28852 IX86_BUILTIN_PCMPEQB256,
28853 IX86_BUILTIN_PCMPEQW256,
28854 IX86_BUILTIN_PCMPEQD256,
28855 IX86_BUILTIN_PCMPEQQ256,
28856 IX86_BUILTIN_PCMPGTB256,
28857 IX86_BUILTIN_PCMPGTW256,
28858 IX86_BUILTIN_PCMPGTD256,
28859 IX86_BUILTIN_PCMPGTQ256,
28860 IX86_BUILTIN_PHADDW256,
28861 IX86_BUILTIN_PHADDD256,
28862 IX86_BUILTIN_PHADDSW256,
28863 IX86_BUILTIN_PHSUBW256,
28864 IX86_BUILTIN_PHSUBD256,
28865 IX86_BUILTIN_PHSUBSW256,
28866 IX86_BUILTIN_PMADDUBSW256,
28867 IX86_BUILTIN_PMADDWD256,
28868 IX86_BUILTIN_PMAXSB256,
28869 IX86_BUILTIN_PMAXSW256,
28870 IX86_BUILTIN_PMAXSD256,
28871 IX86_BUILTIN_PMAXUB256,
28872 IX86_BUILTIN_PMAXUW256,
28873 IX86_BUILTIN_PMAXUD256,
28874 IX86_BUILTIN_PMINSB256,
28875 IX86_BUILTIN_PMINSW256,
28876 IX86_BUILTIN_PMINSD256,
28877 IX86_BUILTIN_PMINUB256,
28878 IX86_BUILTIN_PMINUW256,
28879 IX86_BUILTIN_PMINUD256,
28880 IX86_BUILTIN_PMOVMSKB256,
28881 IX86_BUILTIN_PMOVSXBW256,
28882 IX86_BUILTIN_PMOVSXBD256,
28883 IX86_BUILTIN_PMOVSXBQ256,
28884 IX86_BUILTIN_PMOVSXWD256,
28885 IX86_BUILTIN_PMOVSXWQ256,
28886 IX86_BUILTIN_PMOVSXDQ256,
28887 IX86_BUILTIN_PMOVZXBW256,
28888 IX86_BUILTIN_PMOVZXBD256,
28889 IX86_BUILTIN_PMOVZXBQ256,
28890 IX86_BUILTIN_PMOVZXWD256,
28891 IX86_BUILTIN_PMOVZXWQ256,
28892 IX86_BUILTIN_PMOVZXDQ256,
28893 IX86_BUILTIN_PMULDQ256,
28894 IX86_BUILTIN_PMULHRSW256,
28895 IX86_BUILTIN_PMULHUW256,
28896 IX86_BUILTIN_PMULHW256,
28897 IX86_BUILTIN_PMULLW256,
28898 IX86_BUILTIN_PMULLD256,
28899 IX86_BUILTIN_PMULUDQ256,
28900 IX86_BUILTIN_POR256,
28901 IX86_BUILTIN_PSADBW256,
28902 IX86_BUILTIN_PSHUFB256,
28903 IX86_BUILTIN_PSHUFD256,
28904 IX86_BUILTIN_PSHUFHW256,
28905 IX86_BUILTIN_PSHUFLW256,
28906 IX86_BUILTIN_PSIGNB256,
28907 IX86_BUILTIN_PSIGNW256,
28908 IX86_BUILTIN_PSIGND256,
28909 IX86_BUILTIN_PSLLDQI256,
28910 IX86_BUILTIN_PSLLWI256,
28911 IX86_BUILTIN_PSLLW256,
28912 IX86_BUILTIN_PSLLDI256,
28913 IX86_BUILTIN_PSLLD256,
28914 IX86_BUILTIN_PSLLQI256,
28915 IX86_BUILTIN_PSLLQ256,
28916 IX86_BUILTIN_PSRAWI256,
28917 IX86_BUILTIN_PSRAW256,
28918 IX86_BUILTIN_PSRADI256,
28919 IX86_BUILTIN_PSRAD256,
28920 IX86_BUILTIN_PSRLDQI256,
28921 IX86_BUILTIN_PSRLWI256,
28922 IX86_BUILTIN_PSRLW256,
28923 IX86_BUILTIN_PSRLDI256,
28924 IX86_BUILTIN_PSRLD256,
28925 IX86_BUILTIN_PSRLQI256,
28926 IX86_BUILTIN_PSRLQ256,
28927 IX86_BUILTIN_PSUBB256,
28928 IX86_BUILTIN_PSUBW256,
28929 IX86_BUILTIN_PSUBD256,
28930 IX86_BUILTIN_PSUBQ256,
28931 IX86_BUILTIN_PSUBSB256,
28932 IX86_BUILTIN_PSUBSW256,
28933 IX86_BUILTIN_PSUBUSB256,
28934 IX86_BUILTIN_PSUBUSW256,
28935 IX86_BUILTIN_PUNPCKHBW256,
28936 IX86_BUILTIN_PUNPCKHWD256,
28937 IX86_BUILTIN_PUNPCKHDQ256,
28938 IX86_BUILTIN_PUNPCKHQDQ256,
28939 IX86_BUILTIN_PUNPCKLBW256,
28940 IX86_BUILTIN_PUNPCKLWD256,
28941 IX86_BUILTIN_PUNPCKLDQ256,
28942 IX86_BUILTIN_PUNPCKLQDQ256,
28943 IX86_BUILTIN_PXOR256,
28944 IX86_BUILTIN_MOVNTDQA256,
28945 IX86_BUILTIN_VBROADCASTSS_PS,
28946 IX86_BUILTIN_VBROADCASTSS_PS256,
28947 IX86_BUILTIN_VBROADCASTSD_PD256,
28948 IX86_BUILTIN_VBROADCASTSI256,
28949 IX86_BUILTIN_PBLENDD256,
28950 IX86_BUILTIN_PBLENDD128,
28951 IX86_BUILTIN_PBROADCASTB256,
28952 IX86_BUILTIN_PBROADCASTW256,
28953 IX86_BUILTIN_PBROADCASTD256,
28954 IX86_BUILTIN_PBROADCASTQ256,
28955 IX86_BUILTIN_PBROADCASTB128,
28956 IX86_BUILTIN_PBROADCASTW128,
28957 IX86_BUILTIN_PBROADCASTD128,
28958 IX86_BUILTIN_PBROADCASTQ128,
28959 IX86_BUILTIN_VPERMVARSI256,
28960 IX86_BUILTIN_VPERMDF256,
28961 IX86_BUILTIN_VPERMVARSF256,
28962 IX86_BUILTIN_VPERMDI256,
28963 IX86_BUILTIN_VPERMTI256,
28964 IX86_BUILTIN_VEXTRACT128I256,
28965 IX86_BUILTIN_VINSERT128I256,
28966 IX86_BUILTIN_MASKLOADD,
28967 IX86_BUILTIN_MASKLOADQ,
28968 IX86_BUILTIN_MASKLOADD256,
28969 IX86_BUILTIN_MASKLOADQ256,
28970 IX86_BUILTIN_MASKSTORED,
28971 IX86_BUILTIN_MASKSTOREQ,
28972 IX86_BUILTIN_MASKSTORED256,
28973 IX86_BUILTIN_MASKSTOREQ256,
28974 IX86_BUILTIN_PSLLVV4DI,
28975 IX86_BUILTIN_PSLLVV2DI,
28976 IX86_BUILTIN_PSLLVV8SI,
28977 IX86_BUILTIN_PSLLVV4SI,
28978 IX86_BUILTIN_PSRAVV8SI,
28979 IX86_BUILTIN_PSRAVV4SI,
28980 IX86_BUILTIN_PSRLVV4DI,
28981 IX86_BUILTIN_PSRLVV2DI,
28982 IX86_BUILTIN_PSRLVV8SI,
28983 IX86_BUILTIN_PSRLVV4SI,
28985 IX86_BUILTIN_GATHERSIV2DF,
28986 IX86_BUILTIN_GATHERSIV4DF,
28987 IX86_BUILTIN_GATHERDIV2DF,
28988 IX86_BUILTIN_GATHERDIV4DF,
28989 IX86_BUILTIN_GATHERSIV4SF,
28990 IX86_BUILTIN_GATHERSIV8SF,
28991 IX86_BUILTIN_GATHERDIV4SF,
28992 IX86_BUILTIN_GATHERDIV8SF,
28993 IX86_BUILTIN_GATHERSIV2DI,
28994 IX86_BUILTIN_GATHERSIV4DI,
28995 IX86_BUILTIN_GATHERDIV2DI,
28996 IX86_BUILTIN_GATHERDIV4DI,
28997 IX86_BUILTIN_GATHERSIV4SI,
28998 IX86_BUILTIN_GATHERSIV8SI,
28999 IX86_BUILTIN_GATHERDIV4SI,
29000 IX86_BUILTIN_GATHERDIV8SI,
29002 /* AVX512F */
29003 IX86_BUILTIN_SI512_SI256,
29004 IX86_BUILTIN_PD512_PD256,
29005 IX86_BUILTIN_PS512_PS256,
29006 IX86_BUILTIN_SI512_SI,
29007 IX86_BUILTIN_PD512_PD,
29008 IX86_BUILTIN_PS512_PS,
29009 IX86_BUILTIN_ADDPD512,
29010 IX86_BUILTIN_ADDPS512,
29011 IX86_BUILTIN_ADDSD_ROUND,
29012 IX86_BUILTIN_ADDSS_ROUND,
29013 IX86_BUILTIN_ALIGND512,
29014 IX86_BUILTIN_ALIGNQ512,
29015 IX86_BUILTIN_BLENDMD512,
29016 IX86_BUILTIN_BLENDMPD512,
29017 IX86_BUILTIN_BLENDMPS512,
29018 IX86_BUILTIN_BLENDMQ512,
29019 IX86_BUILTIN_BROADCASTF32X4_512,
29020 IX86_BUILTIN_BROADCASTF64X4_512,
29021 IX86_BUILTIN_BROADCASTI32X4_512,
29022 IX86_BUILTIN_BROADCASTI64X4_512,
29023 IX86_BUILTIN_BROADCASTSD512,
29024 IX86_BUILTIN_BROADCASTSS512,
29025 IX86_BUILTIN_CMPD512,
29026 IX86_BUILTIN_CMPPD512,
29027 IX86_BUILTIN_CMPPS512,
29028 IX86_BUILTIN_CMPQ512,
29029 IX86_BUILTIN_CMPSD_MASK,
29030 IX86_BUILTIN_CMPSS_MASK,
29031 IX86_BUILTIN_COMIDF,
29032 IX86_BUILTIN_COMISF,
29033 IX86_BUILTIN_COMPRESSPD512,
29034 IX86_BUILTIN_COMPRESSPDSTORE512,
29035 IX86_BUILTIN_COMPRESSPS512,
29036 IX86_BUILTIN_COMPRESSPSSTORE512,
29037 IX86_BUILTIN_CVTDQ2PD512,
29038 IX86_BUILTIN_CVTDQ2PS512,
29039 IX86_BUILTIN_CVTPD2DQ512,
29040 IX86_BUILTIN_CVTPD2PS512,
29041 IX86_BUILTIN_CVTPD2UDQ512,
29042 IX86_BUILTIN_CVTPH2PS512,
29043 IX86_BUILTIN_CVTPS2DQ512,
29044 IX86_BUILTIN_CVTPS2PD512,
29045 IX86_BUILTIN_CVTPS2PH512,
29046 IX86_BUILTIN_CVTPS2UDQ512,
29047 IX86_BUILTIN_CVTSD2SS_ROUND,
29048 IX86_BUILTIN_CVTSI2SD64,
29049 IX86_BUILTIN_CVTSI2SS32,
29050 IX86_BUILTIN_CVTSI2SS64,
29051 IX86_BUILTIN_CVTSS2SD_ROUND,
29052 IX86_BUILTIN_CVTTPD2DQ512,
29053 IX86_BUILTIN_CVTTPD2UDQ512,
29054 IX86_BUILTIN_CVTTPS2DQ512,
29055 IX86_BUILTIN_CVTTPS2UDQ512,
29056 IX86_BUILTIN_CVTUDQ2PD512,
29057 IX86_BUILTIN_CVTUDQ2PS512,
29058 IX86_BUILTIN_CVTUSI2SD32,
29059 IX86_BUILTIN_CVTUSI2SD64,
29060 IX86_BUILTIN_CVTUSI2SS32,
29061 IX86_BUILTIN_CVTUSI2SS64,
29062 IX86_BUILTIN_DIVPD512,
29063 IX86_BUILTIN_DIVPS512,
29064 IX86_BUILTIN_DIVSD_ROUND,
29065 IX86_BUILTIN_DIVSS_ROUND,
29066 IX86_BUILTIN_EXPANDPD512,
29067 IX86_BUILTIN_EXPANDPD512Z,
29068 IX86_BUILTIN_EXPANDPDLOAD512,
29069 IX86_BUILTIN_EXPANDPDLOAD512Z,
29070 IX86_BUILTIN_EXPANDPS512,
29071 IX86_BUILTIN_EXPANDPS512Z,
29072 IX86_BUILTIN_EXPANDPSLOAD512,
29073 IX86_BUILTIN_EXPANDPSLOAD512Z,
29074 IX86_BUILTIN_EXTRACTF32X4,
29075 IX86_BUILTIN_EXTRACTF64X4,
29076 IX86_BUILTIN_EXTRACTI32X4,
29077 IX86_BUILTIN_EXTRACTI64X4,
29078 IX86_BUILTIN_FIXUPIMMPD512_MASK,
29079 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
29080 IX86_BUILTIN_FIXUPIMMPS512_MASK,
29081 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
29082 IX86_BUILTIN_FIXUPIMMSD128_MASK,
29083 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
29084 IX86_BUILTIN_FIXUPIMMSS128_MASK,
29085 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
29086 IX86_BUILTIN_GETEXPPD512,
29087 IX86_BUILTIN_GETEXPPS512,
29088 IX86_BUILTIN_GETEXPSD128,
29089 IX86_BUILTIN_GETEXPSS128,
29090 IX86_BUILTIN_GETMANTPD512,
29091 IX86_BUILTIN_GETMANTPS512,
29092 IX86_BUILTIN_GETMANTSD128,
29093 IX86_BUILTIN_GETMANTSS128,
29094 IX86_BUILTIN_INSERTF32X4,
29095 IX86_BUILTIN_INSERTF64X4,
29096 IX86_BUILTIN_INSERTI32X4,
29097 IX86_BUILTIN_INSERTI64X4,
29098 IX86_BUILTIN_LOADAPD512,
29099 IX86_BUILTIN_LOADAPS512,
29100 IX86_BUILTIN_LOADDQUDI512,
29101 IX86_BUILTIN_LOADDQUSI512,
29102 IX86_BUILTIN_LOADUPD512,
29103 IX86_BUILTIN_LOADUPS512,
29104 IX86_BUILTIN_MAXPD512,
29105 IX86_BUILTIN_MAXPS512,
29106 IX86_BUILTIN_MAXSD_ROUND,
29107 IX86_BUILTIN_MAXSS_ROUND,
29108 IX86_BUILTIN_MINPD512,
29109 IX86_BUILTIN_MINPS512,
29110 IX86_BUILTIN_MINSD_ROUND,
29111 IX86_BUILTIN_MINSS_ROUND,
29112 IX86_BUILTIN_MOVAPD512,
29113 IX86_BUILTIN_MOVAPS512,
29114 IX86_BUILTIN_MOVDDUP512,
29115 IX86_BUILTIN_MOVDQA32LOAD512,
29116 IX86_BUILTIN_MOVDQA32STORE512,
29117 IX86_BUILTIN_MOVDQA32_512,
29118 IX86_BUILTIN_MOVDQA64LOAD512,
29119 IX86_BUILTIN_MOVDQA64STORE512,
29120 IX86_BUILTIN_MOVDQA64_512,
29121 IX86_BUILTIN_MOVNTDQ512,
29122 IX86_BUILTIN_MOVNTDQA512,
29123 IX86_BUILTIN_MOVNTPD512,
29124 IX86_BUILTIN_MOVNTPS512,
29125 IX86_BUILTIN_MOVSHDUP512,
29126 IX86_BUILTIN_MOVSLDUP512,
29127 IX86_BUILTIN_MULPD512,
29128 IX86_BUILTIN_MULPS512,
29129 IX86_BUILTIN_MULSD_ROUND,
29130 IX86_BUILTIN_MULSS_ROUND,
29131 IX86_BUILTIN_PABSD512,
29132 IX86_BUILTIN_PABSQ512,
29133 IX86_BUILTIN_PADDD512,
29134 IX86_BUILTIN_PADDQ512,
29135 IX86_BUILTIN_PANDD512,
29136 IX86_BUILTIN_PANDND512,
29137 IX86_BUILTIN_PANDNQ512,
29138 IX86_BUILTIN_PANDQ512,
29139 IX86_BUILTIN_PBROADCASTD512,
29140 IX86_BUILTIN_PBROADCASTD512_GPR,
29141 IX86_BUILTIN_PBROADCASTMB512,
29142 IX86_BUILTIN_PBROADCASTMW512,
29143 IX86_BUILTIN_PBROADCASTQ512,
29144 IX86_BUILTIN_PBROADCASTQ512_GPR,
29145 IX86_BUILTIN_PCMPEQD512_MASK,
29146 IX86_BUILTIN_PCMPEQQ512_MASK,
29147 IX86_BUILTIN_PCMPGTD512_MASK,
29148 IX86_BUILTIN_PCMPGTQ512_MASK,
29149 IX86_BUILTIN_PCOMPRESSD512,
29150 IX86_BUILTIN_PCOMPRESSDSTORE512,
29151 IX86_BUILTIN_PCOMPRESSQ512,
29152 IX86_BUILTIN_PCOMPRESSQSTORE512,
29153 IX86_BUILTIN_PEXPANDD512,
29154 IX86_BUILTIN_PEXPANDD512Z,
29155 IX86_BUILTIN_PEXPANDDLOAD512,
29156 IX86_BUILTIN_PEXPANDDLOAD512Z,
29157 IX86_BUILTIN_PEXPANDQ512,
29158 IX86_BUILTIN_PEXPANDQ512Z,
29159 IX86_BUILTIN_PEXPANDQLOAD512,
29160 IX86_BUILTIN_PEXPANDQLOAD512Z,
29161 IX86_BUILTIN_PMAXSD512,
29162 IX86_BUILTIN_PMAXSQ512,
29163 IX86_BUILTIN_PMAXUD512,
29164 IX86_BUILTIN_PMAXUQ512,
29165 IX86_BUILTIN_PMINSD512,
29166 IX86_BUILTIN_PMINSQ512,
29167 IX86_BUILTIN_PMINUD512,
29168 IX86_BUILTIN_PMINUQ512,
29169 IX86_BUILTIN_PMOVDB512,
29170 IX86_BUILTIN_PMOVDB512_MEM,
29171 IX86_BUILTIN_PMOVDW512,
29172 IX86_BUILTIN_PMOVDW512_MEM,
29173 IX86_BUILTIN_PMOVQB512,
29174 IX86_BUILTIN_PMOVQB512_MEM,
29175 IX86_BUILTIN_PMOVQD512,
29176 IX86_BUILTIN_PMOVQD512_MEM,
29177 IX86_BUILTIN_PMOVQW512,
29178 IX86_BUILTIN_PMOVQW512_MEM,
29179 IX86_BUILTIN_PMOVSDB512,
29180 IX86_BUILTIN_PMOVSDB512_MEM,
29181 IX86_BUILTIN_PMOVSDW512,
29182 IX86_BUILTIN_PMOVSDW512_MEM,
29183 IX86_BUILTIN_PMOVSQB512,
29184 IX86_BUILTIN_PMOVSQB512_MEM,
29185 IX86_BUILTIN_PMOVSQD512,
29186 IX86_BUILTIN_PMOVSQD512_MEM,
29187 IX86_BUILTIN_PMOVSQW512,
29188 IX86_BUILTIN_PMOVSQW512_MEM,
29189 IX86_BUILTIN_PMOVSXBD512,
29190 IX86_BUILTIN_PMOVSXBQ512,
29191 IX86_BUILTIN_PMOVSXDQ512,
29192 IX86_BUILTIN_PMOVSXWD512,
29193 IX86_BUILTIN_PMOVSXWQ512,
29194 IX86_BUILTIN_PMOVUSDB512,
29195 IX86_BUILTIN_PMOVUSDB512_MEM,
29196 IX86_BUILTIN_PMOVUSDW512,
29197 IX86_BUILTIN_PMOVUSDW512_MEM,
29198 IX86_BUILTIN_PMOVUSQB512,
29199 IX86_BUILTIN_PMOVUSQB512_MEM,
29200 IX86_BUILTIN_PMOVUSQD512,
29201 IX86_BUILTIN_PMOVUSQD512_MEM,
29202 IX86_BUILTIN_PMOVUSQW512,
29203 IX86_BUILTIN_PMOVUSQW512_MEM,
29204 IX86_BUILTIN_PMOVZXBD512,
29205 IX86_BUILTIN_PMOVZXBQ512,
29206 IX86_BUILTIN_PMOVZXDQ512,
29207 IX86_BUILTIN_PMOVZXWD512,
29208 IX86_BUILTIN_PMOVZXWQ512,
29209 IX86_BUILTIN_PMULDQ512,
29210 IX86_BUILTIN_PMULLD512,
29211 IX86_BUILTIN_PMULUDQ512,
29212 IX86_BUILTIN_PORD512,
29213 IX86_BUILTIN_PORQ512,
29214 IX86_BUILTIN_PROLD512,
29215 IX86_BUILTIN_PROLQ512,
29216 IX86_BUILTIN_PROLVD512,
29217 IX86_BUILTIN_PROLVQ512,
29218 IX86_BUILTIN_PRORD512,
29219 IX86_BUILTIN_PRORQ512,
29220 IX86_BUILTIN_PRORVD512,
29221 IX86_BUILTIN_PRORVQ512,
29222 IX86_BUILTIN_PSHUFD512,
29223 IX86_BUILTIN_PSLLD512,
29224 IX86_BUILTIN_PSLLDI512,
29225 IX86_BUILTIN_PSLLQ512,
29226 IX86_BUILTIN_PSLLQI512,
29227 IX86_BUILTIN_PSLLVV16SI,
29228 IX86_BUILTIN_PSLLVV8DI,
29229 IX86_BUILTIN_PSRAD512,
29230 IX86_BUILTIN_PSRADI512,
29231 IX86_BUILTIN_PSRAQ512,
29232 IX86_BUILTIN_PSRAQI512,
29233 IX86_BUILTIN_PSRAVV16SI,
29234 IX86_BUILTIN_PSRAVV8DI,
29235 IX86_BUILTIN_PSRLD512,
29236 IX86_BUILTIN_PSRLDI512,
29237 IX86_BUILTIN_PSRLQ512,
29238 IX86_BUILTIN_PSRLQI512,
29239 IX86_BUILTIN_PSRLVV16SI,
29240 IX86_BUILTIN_PSRLVV8DI,
29241 IX86_BUILTIN_PSUBD512,
29242 IX86_BUILTIN_PSUBQ512,
29243 IX86_BUILTIN_PTESTMD512,
29244 IX86_BUILTIN_PTESTMQ512,
29245 IX86_BUILTIN_PTESTNMD512,
29246 IX86_BUILTIN_PTESTNMQ512,
29247 IX86_BUILTIN_PUNPCKHDQ512,
29248 IX86_BUILTIN_PUNPCKHQDQ512,
29249 IX86_BUILTIN_PUNPCKLDQ512,
29250 IX86_BUILTIN_PUNPCKLQDQ512,
29251 IX86_BUILTIN_PXORD512,
29252 IX86_BUILTIN_PXORQ512,
29253 IX86_BUILTIN_RCP14PD512,
29254 IX86_BUILTIN_RCP14PS512,
29255 IX86_BUILTIN_RCP14SD,
29256 IX86_BUILTIN_RCP14SS,
29257 IX86_BUILTIN_RNDSCALEPD,
29258 IX86_BUILTIN_RNDSCALEPS,
29259 IX86_BUILTIN_RNDSCALESD,
29260 IX86_BUILTIN_RNDSCALESS,
29261 IX86_BUILTIN_RSQRT14PD512,
29262 IX86_BUILTIN_RSQRT14PS512,
29263 IX86_BUILTIN_RSQRT14SD,
29264 IX86_BUILTIN_RSQRT14SS,
29265 IX86_BUILTIN_SCALEFPD512,
29266 IX86_BUILTIN_SCALEFPS512,
29267 IX86_BUILTIN_SCALEFSD,
29268 IX86_BUILTIN_SCALEFSS,
29269 IX86_BUILTIN_SHUFPD512,
29270 IX86_BUILTIN_SHUFPS512,
29271 IX86_BUILTIN_SHUF_F32x4,
29272 IX86_BUILTIN_SHUF_F64x2,
29273 IX86_BUILTIN_SHUF_I32x4,
29274 IX86_BUILTIN_SHUF_I64x2,
29275 IX86_BUILTIN_SQRTPD512,
29276 IX86_BUILTIN_SQRTPD512_MASK,
29277 IX86_BUILTIN_SQRTPS512_MASK,
29278 IX86_BUILTIN_SQRTPS_NR512,
29279 IX86_BUILTIN_SQRTSD_ROUND,
29280 IX86_BUILTIN_SQRTSS_ROUND,
29281 IX86_BUILTIN_STOREAPD512,
29282 IX86_BUILTIN_STOREAPS512,
29283 IX86_BUILTIN_STOREDQUDI512,
29284 IX86_BUILTIN_STOREDQUSI512,
29285 IX86_BUILTIN_STOREUPD512,
29286 IX86_BUILTIN_STOREUPS512,
29287 IX86_BUILTIN_SUBPD512,
29288 IX86_BUILTIN_SUBPS512,
29289 IX86_BUILTIN_SUBSD_ROUND,
29290 IX86_BUILTIN_SUBSS_ROUND,
29291 IX86_BUILTIN_UCMPD512,
29292 IX86_BUILTIN_UCMPQ512,
29293 IX86_BUILTIN_UNPCKHPD512,
29294 IX86_BUILTIN_UNPCKHPS512,
29295 IX86_BUILTIN_UNPCKLPD512,
29296 IX86_BUILTIN_UNPCKLPS512,
29297 IX86_BUILTIN_VCVTSD2SI32,
29298 IX86_BUILTIN_VCVTSD2SI64,
29299 IX86_BUILTIN_VCVTSD2USI32,
29300 IX86_BUILTIN_VCVTSD2USI64,
29301 IX86_BUILTIN_VCVTSS2SI32,
29302 IX86_BUILTIN_VCVTSS2SI64,
29303 IX86_BUILTIN_VCVTSS2USI32,
29304 IX86_BUILTIN_VCVTSS2USI64,
29305 IX86_BUILTIN_VCVTTSD2SI32,
29306 IX86_BUILTIN_VCVTTSD2SI64,
29307 IX86_BUILTIN_VCVTTSD2USI32,
29308 IX86_BUILTIN_VCVTTSD2USI64,
29309 IX86_BUILTIN_VCVTTSS2SI32,
29310 IX86_BUILTIN_VCVTTSS2SI64,
29311 IX86_BUILTIN_VCVTTSS2USI32,
29312 IX86_BUILTIN_VCVTTSS2USI64,
29313 IX86_BUILTIN_VFMADDPD512_MASK,
29314 IX86_BUILTIN_VFMADDPD512_MASK3,
29315 IX86_BUILTIN_VFMADDPD512_MASKZ,
29316 IX86_BUILTIN_VFMADDPS512_MASK,
29317 IX86_BUILTIN_VFMADDPS512_MASK3,
29318 IX86_BUILTIN_VFMADDPS512_MASKZ,
29319 IX86_BUILTIN_VFMADDSD3_ROUND,
29320 IX86_BUILTIN_VFMADDSS3_ROUND,
29321 IX86_BUILTIN_VFMADDSUBPD512_MASK,
29322 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
29323 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
29324 IX86_BUILTIN_VFMADDSUBPS512_MASK,
29325 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
29326 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
29327 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
29328 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
29329 IX86_BUILTIN_VFMSUBPD512_MASK3,
29330 IX86_BUILTIN_VFMSUBPS512_MASK3,
29331 IX86_BUILTIN_VFMSUBSD3_MASK3,
29332 IX86_BUILTIN_VFMSUBSS3_MASK3,
29333 IX86_BUILTIN_VFNMADDPD512_MASK,
29334 IX86_BUILTIN_VFNMADDPS512_MASK,
29335 IX86_BUILTIN_VFNMSUBPD512_MASK,
29336 IX86_BUILTIN_VFNMSUBPD512_MASK3,
29337 IX86_BUILTIN_VFNMSUBPS512_MASK,
29338 IX86_BUILTIN_VFNMSUBPS512_MASK3,
29339 IX86_BUILTIN_VPCLZCNTD512,
29340 IX86_BUILTIN_VPCLZCNTQ512,
29341 IX86_BUILTIN_VPCONFLICTD512,
29342 IX86_BUILTIN_VPCONFLICTQ512,
29343 IX86_BUILTIN_VPERMDF512,
29344 IX86_BUILTIN_VPERMDI512,
29345 IX86_BUILTIN_VPERMI2VARD512,
29346 IX86_BUILTIN_VPERMI2VARPD512,
29347 IX86_BUILTIN_VPERMI2VARPS512,
29348 IX86_BUILTIN_VPERMI2VARQ512,
29349 IX86_BUILTIN_VPERMILPD512,
29350 IX86_BUILTIN_VPERMILPS512,
29351 IX86_BUILTIN_VPERMILVARPD512,
29352 IX86_BUILTIN_VPERMILVARPS512,
29353 IX86_BUILTIN_VPERMT2VARD512,
29354 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
29355 IX86_BUILTIN_VPERMT2VARPD512,
29356 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
29357 IX86_BUILTIN_VPERMT2VARPS512,
29358 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
29359 IX86_BUILTIN_VPERMT2VARQ512,
29360 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
29361 IX86_BUILTIN_VPERMVARDF512,
29362 IX86_BUILTIN_VPERMVARDI512,
29363 IX86_BUILTIN_VPERMVARSF512,
29364 IX86_BUILTIN_VPERMVARSI512,
29365 IX86_BUILTIN_VTERNLOGD512_MASK,
29366 IX86_BUILTIN_VTERNLOGD512_MASKZ,
29367 IX86_BUILTIN_VTERNLOGQ512_MASK,
29368 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
29370 /* Mask arithmetic operations */
29371 IX86_BUILTIN_KAND16,
29372 IX86_BUILTIN_KANDN16,
29373 IX86_BUILTIN_KNOT16,
29374 IX86_BUILTIN_KOR16,
29375 IX86_BUILTIN_KORTESTC16,
29376 IX86_BUILTIN_KORTESTZ16,
29377 IX86_BUILTIN_KUNPCKBW,
29378 IX86_BUILTIN_KXNOR16,
29379 IX86_BUILTIN_KXOR16,
29380 IX86_BUILTIN_KMOV16,
29382 /* AVX512VL. */
29383 IX86_BUILTIN_PMOVUSQD256_MEM,
29384 IX86_BUILTIN_PMOVUSQD128_MEM,
29385 IX86_BUILTIN_PMOVSQD256_MEM,
29386 IX86_BUILTIN_PMOVSQD128_MEM,
29387 IX86_BUILTIN_PMOVQD256_MEM,
29388 IX86_BUILTIN_PMOVQD128_MEM,
29389 IX86_BUILTIN_PMOVUSQW256_MEM,
29390 IX86_BUILTIN_PMOVUSQW128_MEM,
29391 IX86_BUILTIN_PMOVSQW256_MEM,
29392 IX86_BUILTIN_PMOVSQW128_MEM,
29393 IX86_BUILTIN_PMOVQW256_MEM,
29394 IX86_BUILTIN_PMOVQW128_MEM,
29395 IX86_BUILTIN_PMOVUSQB256_MEM,
29396 IX86_BUILTIN_PMOVUSQB128_MEM,
29397 IX86_BUILTIN_PMOVSQB256_MEM,
29398 IX86_BUILTIN_PMOVSQB128_MEM,
29399 IX86_BUILTIN_PMOVQB256_MEM,
29400 IX86_BUILTIN_PMOVQB128_MEM,
29401 IX86_BUILTIN_PMOVUSDW256_MEM,
29402 IX86_BUILTIN_PMOVUSDW128_MEM,
29403 IX86_BUILTIN_PMOVSDW256_MEM,
29404 IX86_BUILTIN_PMOVSDW128_MEM,
29405 IX86_BUILTIN_PMOVDW256_MEM,
29406 IX86_BUILTIN_PMOVDW128_MEM,
29407 IX86_BUILTIN_PMOVUSDB256_MEM,
29408 IX86_BUILTIN_PMOVUSDB128_MEM,
29409 IX86_BUILTIN_PMOVSDB256_MEM,
29410 IX86_BUILTIN_PMOVSDB128_MEM,
29411 IX86_BUILTIN_PMOVDB256_MEM,
29412 IX86_BUILTIN_PMOVDB128_MEM,
29413 IX86_BUILTIN_MOVDQA64LOAD256_MASK,
29414 IX86_BUILTIN_MOVDQA64LOAD128_MASK,
29415 IX86_BUILTIN_MOVDQA32LOAD256_MASK,
29416 IX86_BUILTIN_MOVDQA32LOAD128_MASK,
29417 IX86_BUILTIN_MOVDQA64STORE256_MASK,
29418 IX86_BUILTIN_MOVDQA64STORE128_MASK,
29419 IX86_BUILTIN_MOVDQA32STORE256_MASK,
29420 IX86_BUILTIN_MOVDQA32STORE128_MASK,
29421 IX86_BUILTIN_LOADAPD256_MASK,
29422 IX86_BUILTIN_LOADAPD128_MASK,
29423 IX86_BUILTIN_LOADAPS256_MASK,
29424 IX86_BUILTIN_LOADAPS128_MASK,
29425 IX86_BUILTIN_STOREAPD256_MASK,
29426 IX86_BUILTIN_STOREAPD128_MASK,
29427 IX86_BUILTIN_STOREAPS256_MASK,
29428 IX86_BUILTIN_STOREAPS128_MASK,
29429 IX86_BUILTIN_LOADUPD256_MASK,
29430 IX86_BUILTIN_LOADUPD128_MASK,
29431 IX86_BUILTIN_LOADUPS256_MASK,
29432 IX86_BUILTIN_LOADUPS128_MASK,
29433 IX86_BUILTIN_STOREUPD256_MASK,
29434 IX86_BUILTIN_STOREUPD128_MASK,
29435 IX86_BUILTIN_STOREUPS256_MASK,
29436 IX86_BUILTIN_STOREUPS128_MASK,
29437 IX86_BUILTIN_LOADDQUDI256_MASK,
29438 IX86_BUILTIN_LOADDQUDI128_MASK,
29439 IX86_BUILTIN_LOADDQUSI256_MASK,
29440 IX86_BUILTIN_LOADDQUSI128_MASK,
29441 IX86_BUILTIN_LOADDQUHI256_MASK,
29442 IX86_BUILTIN_LOADDQUHI128_MASK,
29443 IX86_BUILTIN_LOADDQUQI256_MASK,
29444 IX86_BUILTIN_LOADDQUQI128_MASK,
29445 IX86_BUILTIN_STOREDQUDI256_MASK,
29446 IX86_BUILTIN_STOREDQUDI128_MASK,
29447 IX86_BUILTIN_STOREDQUSI256_MASK,
29448 IX86_BUILTIN_STOREDQUSI128_MASK,
29449 IX86_BUILTIN_STOREDQUHI256_MASK,
29450 IX86_BUILTIN_STOREDQUHI128_MASK,
29451 IX86_BUILTIN_STOREDQUQI256_MASK,
29452 IX86_BUILTIN_STOREDQUQI128_MASK,
29453 IX86_BUILTIN_COMPRESSPDSTORE256,
29454 IX86_BUILTIN_COMPRESSPDSTORE128,
29455 IX86_BUILTIN_COMPRESSPSSTORE256,
29456 IX86_BUILTIN_COMPRESSPSSTORE128,
29457 IX86_BUILTIN_PCOMPRESSQSTORE256,
29458 IX86_BUILTIN_PCOMPRESSQSTORE128,
29459 IX86_BUILTIN_PCOMPRESSDSTORE256,
29460 IX86_BUILTIN_PCOMPRESSDSTORE128,
29461 IX86_BUILTIN_EXPANDPDLOAD256,
29462 IX86_BUILTIN_EXPANDPDLOAD128,
29463 IX86_BUILTIN_EXPANDPSLOAD256,
29464 IX86_BUILTIN_EXPANDPSLOAD128,
29465 IX86_BUILTIN_PEXPANDQLOAD256,
29466 IX86_BUILTIN_PEXPANDQLOAD128,
29467 IX86_BUILTIN_PEXPANDDLOAD256,
29468 IX86_BUILTIN_PEXPANDDLOAD128,
29469 IX86_BUILTIN_EXPANDPDLOAD256Z,
29470 IX86_BUILTIN_EXPANDPDLOAD128Z,
29471 IX86_BUILTIN_EXPANDPSLOAD256Z,
29472 IX86_BUILTIN_EXPANDPSLOAD128Z,
29473 IX86_BUILTIN_PEXPANDQLOAD256Z,
29474 IX86_BUILTIN_PEXPANDQLOAD128Z,
29475 IX86_BUILTIN_PEXPANDDLOAD256Z,
29476 IX86_BUILTIN_PEXPANDDLOAD128Z,
29477 IX86_BUILTIN_PALIGNR256_MASK,
29478 IX86_BUILTIN_PALIGNR128_MASK,
29479 IX86_BUILTIN_MOVDQA64_256_MASK,
29480 IX86_BUILTIN_MOVDQA64_128_MASK,
29481 IX86_BUILTIN_MOVDQA32_256_MASK,
29482 IX86_BUILTIN_MOVDQA32_128_MASK,
29483 IX86_BUILTIN_MOVAPD256_MASK,
29484 IX86_BUILTIN_MOVAPD128_MASK,
29485 IX86_BUILTIN_MOVAPS256_MASK,
29486 IX86_BUILTIN_MOVAPS128_MASK,
29487 IX86_BUILTIN_MOVDQUHI256_MASK,
29488 IX86_BUILTIN_MOVDQUHI128_MASK,
29489 IX86_BUILTIN_MOVDQUQI256_MASK,
29490 IX86_BUILTIN_MOVDQUQI128_MASK,
29491 IX86_BUILTIN_MINPS128_MASK,
29492 IX86_BUILTIN_MAXPS128_MASK,
29493 IX86_BUILTIN_MINPD128_MASK,
29494 IX86_BUILTIN_MAXPD128_MASK,
29495 IX86_BUILTIN_MAXPD256_MASK,
29496 IX86_BUILTIN_MAXPS256_MASK,
29497 IX86_BUILTIN_MINPD256_MASK,
29498 IX86_BUILTIN_MINPS256_MASK,
29499 IX86_BUILTIN_MULPS128_MASK,
29500 IX86_BUILTIN_DIVPS128_MASK,
29501 IX86_BUILTIN_MULPD128_MASK,
29502 IX86_BUILTIN_DIVPD128_MASK,
29503 IX86_BUILTIN_DIVPD256_MASK,
29504 IX86_BUILTIN_DIVPS256_MASK,
29505 IX86_BUILTIN_MULPD256_MASK,
29506 IX86_BUILTIN_MULPS256_MASK,
29507 IX86_BUILTIN_ADDPD128_MASK,
29508 IX86_BUILTIN_ADDPD256_MASK,
29509 IX86_BUILTIN_ADDPS128_MASK,
29510 IX86_BUILTIN_ADDPS256_MASK,
29511 IX86_BUILTIN_SUBPD128_MASK,
29512 IX86_BUILTIN_SUBPD256_MASK,
29513 IX86_BUILTIN_SUBPS128_MASK,
29514 IX86_BUILTIN_SUBPS256_MASK,
29515 IX86_BUILTIN_XORPD256_MASK,
29516 IX86_BUILTIN_XORPD128_MASK,
29517 IX86_BUILTIN_XORPS256_MASK,
29518 IX86_BUILTIN_XORPS128_MASK,
29519 IX86_BUILTIN_ORPD256_MASK,
29520 IX86_BUILTIN_ORPD128_MASK,
29521 IX86_BUILTIN_ORPS256_MASK,
29522 IX86_BUILTIN_ORPS128_MASK,
29523 IX86_BUILTIN_BROADCASTF32x2_256,
29524 IX86_BUILTIN_BROADCASTI32x2_256,
29525 IX86_BUILTIN_BROADCASTI32x2_128,
29526 IX86_BUILTIN_BROADCASTF64X2_256,
29527 IX86_BUILTIN_BROADCASTI64X2_256,
29528 IX86_BUILTIN_BROADCASTF32X4_256,
29529 IX86_BUILTIN_BROADCASTI32X4_256,
29530 IX86_BUILTIN_EXTRACTF32X4_256,
29531 IX86_BUILTIN_EXTRACTI32X4_256,
29532 IX86_BUILTIN_DBPSADBW256,
29533 IX86_BUILTIN_DBPSADBW128,
29534 IX86_BUILTIN_CVTTPD2QQ256,
29535 IX86_BUILTIN_CVTTPD2QQ128,
29536 IX86_BUILTIN_CVTTPD2UQQ256,
29537 IX86_BUILTIN_CVTTPD2UQQ128,
29538 IX86_BUILTIN_CVTPD2QQ256,
29539 IX86_BUILTIN_CVTPD2QQ128,
29540 IX86_BUILTIN_CVTPD2UQQ256,
29541 IX86_BUILTIN_CVTPD2UQQ128,
29542 IX86_BUILTIN_CVTPD2UDQ256_MASK,
29543 IX86_BUILTIN_CVTPD2UDQ128_MASK,
29544 IX86_BUILTIN_CVTTPS2QQ256,
29545 IX86_BUILTIN_CVTTPS2QQ128,
29546 IX86_BUILTIN_CVTTPS2UQQ256,
29547 IX86_BUILTIN_CVTTPS2UQQ128,
29548 IX86_BUILTIN_CVTTPS2DQ256_MASK,
29549 IX86_BUILTIN_CVTTPS2DQ128_MASK,
29550 IX86_BUILTIN_CVTTPS2UDQ256,
29551 IX86_BUILTIN_CVTTPS2UDQ128,
29552 IX86_BUILTIN_CVTTPD2DQ256_MASK,
29553 IX86_BUILTIN_CVTTPD2DQ128_MASK,
29554 IX86_BUILTIN_CVTTPD2UDQ256_MASK,
29555 IX86_BUILTIN_CVTTPD2UDQ128_MASK,
29556 IX86_BUILTIN_CVTPD2DQ256_MASK,
29557 IX86_BUILTIN_CVTPD2DQ128_MASK,
29558 IX86_BUILTIN_CVTDQ2PD256_MASK,
29559 IX86_BUILTIN_CVTDQ2PD128_MASK,
29560 IX86_BUILTIN_CVTUDQ2PD256_MASK,
29561 IX86_BUILTIN_CVTUDQ2PD128_MASK,
29562 IX86_BUILTIN_CVTDQ2PS256_MASK,
29563 IX86_BUILTIN_CVTDQ2PS128_MASK,
29564 IX86_BUILTIN_CVTUDQ2PS256_MASK,
29565 IX86_BUILTIN_CVTUDQ2PS128_MASK,
29566 IX86_BUILTIN_CVTPS2PD256_MASK,
29567 IX86_BUILTIN_CVTPS2PD128_MASK,
29568 IX86_BUILTIN_PBROADCASTB256_MASK,
29569 IX86_BUILTIN_PBROADCASTB256_GPR_MASK,
29570 IX86_BUILTIN_PBROADCASTB128_MASK,
29571 IX86_BUILTIN_PBROADCASTB128_GPR_MASK,
29572 IX86_BUILTIN_PBROADCASTW256_MASK,
29573 IX86_BUILTIN_PBROADCASTW256_GPR_MASK,
29574 IX86_BUILTIN_PBROADCASTW128_MASK,
29575 IX86_BUILTIN_PBROADCASTW128_GPR_MASK,
29576 IX86_BUILTIN_PBROADCASTD256_MASK,
29577 IX86_BUILTIN_PBROADCASTD256_GPR_MASK,
29578 IX86_BUILTIN_PBROADCASTD128_MASK,
29579 IX86_BUILTIN_PBROADCASTD128_GPR_MASK,
29580 IX86_BUILTIN_PBROADCASTQ256_MASK,
29581 IX86_BUILTIN_PBROADCASTQ256_GPR_MASK,
29582 IX86_BUILTIN_PBROADCASTQ128_MASK,
29583 IX86_BUILTIN_PBROADCASTQ128_GPR_MASK,
29584 IX86_BUILTIN_BROADCASTSS256,
29585 IX86_BUILTIN_BROADCASTSS128,
29586 IX86_BUILTIN_BROADCASTSD256,
29587 IX86_BUILTIN_EXTRACTF64X2_256,
29588 IX86_BUILTIN_EXTRACTI64X2_256,
29589 IX86_BUILTIN_INSERTF32X4_256,
29590 IX86_BUILTIN_INSERTI32X4_256,
29591 IX86_BUILTIN_PMOVSXBW256_MASK,
29592 IX86_BUILTIN_PMOVSXBW128_MASK,
29593 IX86_BUILTIN_PMOVSXBD256_MASK,
29594 IX86_BUILTIN_PMOVSXBD128_MASK,
29595 IX86_BUILTIN_PMOVSXBQ256_MASK,
29596 IX86_BUILTIN_PMOVSXBQ128_MASK,
29597 IX86_BUILTIN_PMOVSXWD256_MASK,
29598 IX86_BUILTIN_PMOVSXWD128_MASK,
29599 IX86_BUILTIN_PMOVSXWQ256_MASK,
29600 IX86_BUILTIN_PMOVSXWQ128_MASK,
29601 IX86_BUILTIN_PMOVSXDQ256_MASK,
29602 IX86_BUILTIN_PMOVSXDQ128_MASK,
29603 IX86_BUILTIN_PMOVZXBW256_MASK,
29604 IX86_BUILTIN_PMOVZXBW128_MASK,
29605 IX86_BUILTIN_PMOVZXBD256_MASK,
29606 IX86_BUILTIN_PMOVZXBD128_MASK,
29607 IX86_BUILTIN_PMOVZXBQ256_MASK,
29608 IX86_BUILTIN_PMOVZXBQ128_MASK,
29609 IX86_BUILTIN_PMOVZXWD256_MASK,
29610 IX86_BUILTIN_PMOVZXWD128_MASK,
29611 IX86_BUILTIN_PMOVZXWQ256_MASK,
29612 IX86_BUILTIN_PMOVZXWQ128_MASK,
29613 IX86_BUILTIN_PMOVZXDQ256_MASK,
29614 IX86_BUILTIN_PMOVZXDQ128_MASK,
29615 IX86_BUILTIN_REDUCEPD256_MASK,
29616 IX86_BUILTIN_REDUCEPD128_MASK,
29617 IX86_BUILTIN_REDUCEPS256_MASK,
29618 IX86_BUILTIN_REDUCEPS128_MASK,
29619 IX86_BUILTIN_REDUCESD_MASK,
29620 IX86_BUILTIN_REDUCESS_MASK,
29621 IX86_BUILTIN_VPERMVARHI256_MASK,
29622 IX86_BUILTIN_VPERMVARHI128_MASK,
29623 IX86_BUILTIN_VPERMT2VARHI256,
29624 IX86_BUILTIN_VPERMT2VARHI256_MASKZ,
29625 IX86_BUILTIN_VPERMT2VARHI128,
29626 IX86_BUILTIN_VPERMT2VARHI128_MASKZ,
29627 IX86_BUILTIN_VPERMI2VARHI256,
29628 IX86_BUILTIN_VPERMI2VARHI128,
29629 IX86_BUILTIN_RCP14PD256,
29630 IX86_BUILTIN_RCP14PD128,
29631 IX86_BUILTIN_RCP14PS256,
29632 IX86_BUILTIN_RCP14PS128,
29633 IX86_BUILTIN_RSQRT14PD256_MASK,
29634 IX86_BUILTIN_RSQRT14PD128_MASK,
29635 IX86_BUILTIN_RSQRT14PS256_MASK,
29636 IX86_BUILTIN_RSQRT14PS128_MASK,
29637 IX86_BUILTIN_SQRTPD256_MASK,
29638 IX86_BUILTIN_SQRTPD128_MASK,
29639 IX86_BUILTIN_SQRTPS256_MASK,
29640 IX86_BUILTIN_SQRTPS128_MASK,
29641 IX86_BUILTIN_PADDB128_MASK,
29642 IX86_BUILTIN_PADDW128_MASK,
29643 IX86_BUILTIN_PADDD128_MASK,
29644 IX86_BUILTIN_PADDQ128_MASK,
29645 IX86_BUILTIN_PSUBB128_MASK,
29646 IX86_BUILTIN_PSUBW128_MASK,
29647 IX86_BUILTIN_PSUBD128_MASK,
29648 IX86_BUILTIN_PSUBQ128_MASK,
29649 IX86_BUILTIN_PADDSB128_MASK,
29650 IX86_BUILTIN_PADDSW128_MASK,
29651 IX86_BUILTIN_PSUBSB128_MASK,
29652 IX86_BUILTIN_PSUBSW128_MASK,
29653 IX86_BUILTIN_PADDUSB128_MASK,
29654 IX86_BUILTIN_PADDUSW128_MASK,
29655 IX86_BUILTIN_PSUBUSB128_MASK,
29656 IX86_BUILTIN_PSUBUSW128_MASK,
29657 IX86_BUILTIN_PADDB256_MASK,
29658 IX86_BUILTIN_PADDW256_MASK,
29659 IX86_BUILTIN_PADDD256_MASK,
29660 IX86_BUILTIN_PADDQ256_MASK,
29661 IX86_BUILTIN_PADDSB256_MASK,
29662 IX86_BUILTIN_PADDSW256_MASK,
29663 IX86_BUILTIN_PADDUSB256_MASK,
29664 IX86_BUILTIN_PADDUSW256_MASK,
29665 IX86_BUILTIN_PSUBB256_MASK,
29666 IX86_BUILTIN_PSUBW256_MASK,
29667 IX86_BUILTIN_PSUBD256_MASK,
29668 IX86_BUILTIN_PSUBQ256_MASK,
29669 IX86_BUILTIN_PSUBSB256_MASK,
29670 IX86_BUILTIN_PSUBSW256_MASK,
29671 IX86_BUILTIN_PSUBUSB256_MASK,
29672 IX86_BUILTIN_PSUBUSW256_MASK,
29673 IX86_BUILTIN_SHUF_F64x2_256,
29674 IX86_BUILTIN_SHUF_I64x2_256,
29675 IX86_BUILTIN_SHUF_I32x4_256,
29676 IX86_BUILTIN_SHUF_F32x4_256,
29677 IX86_BUILTIN_PMOVWB128,
29678 IX86_BUILTIN_PMOVWB256,
29679 IX86_BUILTIN_PMOVSWB128,
29680 IX86_BUILTIN_PMOVSWB256,
29681 IX86_BUILTIN_PMOVUSWB128,
29682 IX86_BUILTIN_PMOVUSWB256,
29683 IX86_BUILTIN_PMOVDB128,
29684 IX86_BUILTIN_PMOVDB256,
29685 IX86_BUILTIN_PMOVSDB128,
29686 IX86_BUILTIN_PMOVSDB256,
29687 IX86_BUILTIN_PMOVUSDB128,
29688 IX86_BUILTIN_PMOVUSDB256,
29689 IX86_BUILTIN_PMOVDW128,
29690 IX86_BUILTIN_PMOVDW256,
29691 IX86_BUILTIN_PMOVSDW128,
29692 IX86_BUILTIN_PMOVSDW256,
29693 IX86_BUILTIN_PMOVUSDW128,
29694 IX86_BUILTIN_PMOVUSDW256,
29695 IX86_BUILTIN_PMOVQB128,
29696 IX86_BUILTIN_PMOVQB256,
29697 IX86_BUILTIN_PMOVSQB128,
29698 IX86_BUILTIN_PMOVSQB256,
29699 IX86_BUILTIN_PMOVUSQB128,
29700 IX86_BUILTIN_PMOVUSQB256,
29701 IX86_BUILTIN_PMOVQW128,
29702 IX86_BUILTIN_PMOVQW256,
29703 IX86_BUILTIN_PMOVSQW128,
29704 IX86_BUILTIN_PMOVSQW256,
29705 IX86_BUILTIN_PMOVUSQW128,
29706 IX86_BUILTIN_PMOVUSQW256,
29707 IX86_BUILTIN_PMOVQD128,
29708 IX86_BUILTIN_PMOVQD256,
29709 IX86_BUILTIN_PMOVSQD128,
29710 IX86_BUILTIN_PMOVSQD256,
29711 IX86_BUILTIN_PMOVUSQD128,
29712 IX86_BUILTIN_PMOVUSQD256,
29713 IX86_BUILTIN_RANGEPD256,
29714 IX86_BUILTIN_RANGEPD128,
29715 IX86_BUILTIN_RANGEPS256,
29716 IX86_BUILTIN_RANGEPS128,
29717 IX86_BUILTIN_GETEXPPS256,
29718 IX86_BUILTIN_GETEXPPD256,
29719 IX86_BUILTIN_GETEXPPS128,
29720 IX86_BUILTIN_GETEXPPD128,
29721 IX86_BUILTIN_FIXUPIMMPD256_MASK,
29722 IX86_BUILTIN_FIXUPIMMPD256_MASKZ,
29723 IX86_BUILTIN_FIXUPIMMPS256_MASK,
29724 IX86_BUILTIN_FIXUPIMMPS256_MASKZ,
29725 IX86_BUILTIN_FIXUPIMMPD128_MASK,
29726 IX86_BUILTIN_FIXUPIMMPD128_MASKZ,
29727 IX86_BUILTIN_FIXUPIMMPS128_MASK,
29728 IX86_BUILTIN_FIXUPIMMPS128_MASKZ,
29729 IX86_BUILTIN_PABSQ256,
29730 IX86_BUILTIN_PABSQ128,
29731 IX86_BUILTIN_PABSD256_MASK,
29732 IX86_BUILTIN_PABSD128_MASK,
29733 IX86_BUILTIN_PMULHRSW256_MASK,
29734 IX86_BUILTIN_PMULHRSW128_MASK,
29735 IX86_BUILTIN_PMULHUW128_MASK,
29736 IX86_BUILTIN_PMULHUW256_MASK,
29737 IX86_BUILTIN_PMULHW256_MASK,
29738 IX86_BUILTIN_PMULHW128_MASK,
29739 IX86_BUILTIN_PMULLW256_MASK,
29740 IX86_BUILTIN_PMULLW128_MASK,
29741 IX86_BUILTIN_PMULLQ256,
29742 IX86_BUILTIN_PMULLQ128,
29743 IX86_BUILTIN_ANDPD256_MASK,
29744 IX86_BUILTIN_ANDPD128_MASK,
29745 IX86_BUILTIN_ANDPS256_MASK,
29746 IX86_BUILTIN_ANDPS128_MASK,
29747 IX86_BUILTIN_ANDNPD256_MASK,
29748 IX86_BUILTIN_ANDNPD128_MASK,
29749 IX86_BUILTIN_ANDNPS256_MASK,
29750 IX86_BUILTIN_ANDNPS128_MASK,
29751 IX86_BUILTIN_PSLLWI128_MASK,
29752 IX86_BUILTIN_PSLLDI128_MASK,
29753 IX86_BUILTIN_PSLLQI128_MASK,
29754 IX86_BUILTIN_PSLLW128_MASK,
29755 IX86_BUILTIN_PSLLD128_MASK,
29756 IX86_BUILTIN_PSLLQ128_MASK,
29757 IX86_BUILTIN_PSLLWI256_MASK ,
29758 IX86_BUILTIN_PSLLW256_MASK,
29759 IX86_BUILTIN_PSLLDI256_MASK,
29760 IX86_BUILTIN_PSLLD256_MASK,
29761 IX86_BUILTIN_PSLLQI256_MASK,
29762 IX86_BUILTIN_PSLLQ256_MASK,
29763 IX86_BUILTIN_PSRADI128_MASK,
29764 IX86_BUILTIN_PSRAD128_MASK,
29765 IX86_BUILTIN_PSRADI256_MASK,
29766 IX86_BUILTIN_PSRAD256_MASK,
29767 IX86_BUILTIN_PSRAQI128_MASK,
29768 IX86_BUILTIN_PSRAQ128_MASK,
29769 IX86_BUILTIN_PSRAQI256_MASK,
29770 IX86_BUILTIN_PSRAQ256_MASK,
29771 IX86_BUILTIN_PANDD256,
29772 IX86_BUILTIN_PANDD128,
29773 IX86_BUILTIN_PSRLDI128_MASK,
29774 IX86_BUILTIN_PSRLD128_MASK,
29775 IX86_BUILTIN_PSRLDI256_MASK,
29776 IX86_BUILTIN_PSRLD256_MASK,
29777 IX86_BUILTIN_PSRLQI128_MASK,
29778 IX86_BUILTIN_PSRLQ128_MASK,
29779 IX86_BUILTIN_PSRLQI256_MASK,
29780 IX86_BUILTIN_PSRLQ256_MASK,
29781 IX86_BUILTIN_PANDQ256,
29782 IX86_BUILTIN_PANDQ128,
29783 IX86_BUILTIN_PANDND256,
29784 IX86_BUILTIN_PANDND128,
29785 IX86_BUILTIN_PANDNQ256,
29786 IX86_BUILTIN_PANDNQ128,
29787 IX86_BUILTIN_PORD256,
29788 IX86_BUILTIN_PORD128,
29789 IX86_BUILTIN_PORQ256,
29790 IX86_BUILTIN_PORQ128,
29791 IX86_BUILTIN_PXORD256,
29792 IX86_BUILTIN_PXORD128,
29793 IX86_BUILTIN_PXORQ256,
29794 IX86_BUILTIN_PXORQ128,
29795 IX86_BUILTIN_PACKSSWB256_MASK,
29796 IX86_BUILTIN_PACKSSWB128_MASK,
29797 IX86_BUILTIN_PACKUSWB256_MASK,
29798 IX86_BUILTIN_PACKUSWB128_MASK,
29799 IX86_BUILTIN_RNDSCALEPS256,
29800 IX86_BUILTIN_RNDSCALEPD256,
29801 IX86_BUILTIN_RNDSCALEPS128,
29802 IX86_BUILTIN_RNDSCALEPD128,
29803 IX86_BUILTIN_VTERNLOGQ256_MASK,
29804 IX86_BUILTIN_VTERNLOGQ256_MASKZ,
29805 IX86_BUILTIN_VTERNLOGD256_MASK,
29806 IX86_BUILTIN_VTERNLOGD256_MASKZ,
29807 IX86_BUILTIN_VTERNLOGQ128_MASK,
29808 IX86_BUILTIN_VTERNLOGQ128_MASKZ,
29809 IX86_BUILTIN_VTERNLOGD128_MASK,
29810 IX86_BUILTIN_VTERNLOGD128_MASKZ,
29811 IX86_BUILTIN_SCALEFPD256,
29812 IX86_BUILTIN_SCALEFPS256,
29813 IX86_BUILTIN_SCALEFPD128,
29814 IX86_BUILTIN_SCALEFPS128,
29815 IX86_BUILTIN_VFMADDPD256_MASK,
29816 IX86_BUILTIN_VFMADDPD256_MASK3,
29817 IX86_BUILTIN_VFMADDPD256_MASKZ,
29818 IX86_BUILTIN_VFMADDPD128_MASK,
29819 IX86_BUILTIN_VFMADDPD128_MASK3,
29820 IX86_BUILTIN_VFMADDPD128_MASKZ,
29821 IX86_BUILTIN_VFMADDPS256_MASK,
29822 IX86_BUILTIN_VFMADDPS256_MASK3,
29823 IX86_BUILTIN_VFMADDPS256_MASKZ,
29824 IX86_BUILTIN_VFMADDPS128_MASK,
29825 IX86_BUILTIN_VFMADDPS128_MASK3,
29826 IX86_BUILTIN_VFMADDPS128_MASKZ,
29827 IX86_BUILTIN_VFMSUBPD256_MASK3,
29828 IX86_BUILTIN_VFMSUBPD128_MASK3,
29829 IX86_BUILTIN_VFMSUBPS256_MASK3,
29830 IX86_BUILTIN_VFMSUBPS128_MASK3,
29831 IX86_BUILTIN_VFNMADDPD256_MASK,
29832 IX86_BUILTIN_VFNMADDPD128_MASK,
29833 IX86_BUILTIN_VFNMADDPS256_MASK,
29834 IX86_BUILTIN_VFNMADDPS128_MASK,
29835 IX86_BUILTIN_VFNMSUBPD256_MASK,
29836 IX86_BUILTIN_VFNMSUBPD256_MASK3,
29837 IX86_BUILTIN_VFNMSUBPD128_MASK,
29838 IX86_BUILTIN_VFNMSUBPD128_MASK3,
29839 IX86_BUILTIN_VFNMSUBPS256_MASK,
29840 IX86_BUILTIN_VFNMSUBPS256_MASK3,
29841 IX86_BUILTIN_VFNMSUBPS128_MASK,
29842 IX86_BUILTIN_VFNMSUBPS128_MASK3,
29843 IX86_BUILTIN_VFMADDSUBPD256_MASK,
29844 IX86_BUILTIN_VFMADDSUBPD256_MASK3,
29845 IX86_BUILTIN_VFMADDSUBPD256_MASKZ,
29846 IX86_BUILTIN_VFMADDSUBPD128_MASK,
29847 IX86_BUILTIN_VFMADDSUBPD128_MASK3,
29848 IX86_BUILTIN_VFMADDSUBPD128_MASKZ,
29849 IX86_BUILTIN_VFMADDSUBPS256_MASK,
29850 IX86_BUILTIN_VFMADDSUBPS256_MASK3,
29851 IX86_BUILTIN_VFMADDSUBPS256_MASKZ,
29852 IX86_BUILTIN_VFMADDSUBPS128_MASK,
29853 IX86_BUILTIN_VFMADDSUBPS128_MASK3,
29854 IX86_BUILTIN_VFMADDSUBPS128_MASKZ,
29855 IX86_BUILTIN_VFMSUBADDPD256_MASK3,
29856 IX86_BUILTIN_VFMSUBADDPD128_MASK3,
29857 IX86_BUILTIN_VFMSUBADDPS256_MASK3,
29858 IX86_BUILTIN_VFMSUBADDPS128_MASK3,
29859 IX86_BUILTIN_INSERTF64X2_256,
29860 IX86_BUILTIN_INSERTI64X2_256,
29861 IX86_BUILTIN_PSRAVV16HI,
29862 IX86_BUILTIN_PSRAVV8HI,
29863 IX86_BUILTIN_PMADDUBSW256_MASK,
29864 IX86_BUILTIN_PMADDUBSW128_MASK,
29865 IX86_BUILTIN_PMADDWD256_MASK,
29866 IX86_BUILTIN_PMADDWD128_MASK,
29867 IX86_BUILTIN_PSRLVV16HI,
29868 IX86_BUILTIN_PSRLVV8HI,
29869 IX86_BUILTIN_CVTPS2DQ256_MASK,
29870 IX86_BUILTIN_CVTPS2DQ128_MASK,
29871 IX86_BUILTIN_CVTPS2UDQ256,
29872 IX86_BUILTIN_CVTPS2UDQ128,
29873 IX86_BUILTIN_CVTPS2QQ256,
29874 IX86_BUILTIN_CVTPS2QQ128,
29875 IX86_BUILTIN_CVTPS2UQQ256,
29876 IX86_BUILTIN_CVTPS2UQQ128,
29877 IX86_BUILTIN_GETMANTPS256,
29878 IX86_BUILTIN_GETMANTPS128,
29879 IX86_BUILTIN_GETMANTPD256,
29880 IX86_BUILTIN_GETMANTPD128,
29881 IX86_BUILTIN_MOVDDUP256_MASK,
29882 IX86_BUILTIN_MOVDDUP128_MASK,
29883 IX86_BUILTIN_MOVSHDUP256_MASK,
29884 IX86_BUILTIN_MOVSHDUP128_MASK,
29885 IX86_BUILTIN_MOVSLDUP256_MASK,
29886 IX86_BUILTIN_MOVSLDUP128_MASK,
29887 IX86_BUILTIN_CVTQQ2PS256,
29888 IX86_BUILTIN_CVTQQ2PS128,
29889 IX86_BUILTIN_CVTUQQ2PS256,
29890 IX86_BUILTIN_CVTUQQ2PS128,
29891 IX86_BUILTIN_CVTQQ2PD256,
29892 IX86_BUILTIN_CVTQQ2PD128,
29893 IX86_BUILTIN_CVTUQQ2PD256,
29894 IX86_BUILTIN_CVTUQQ2PD128,
29895 IX86_BUILTIN_VPERMT2VARQ256,
29896 IX86_BUILTIN_VPERMT2VARQ256_MASKZ,
29897 IX86_BUILTIN_VPERMT2VARD256,
29898 IX86_BUILTIN_VPERMT2VARD256_MASKZ,
29899 IX86_BUILTIN_VPERMI2VARQ256,
29900 IX86_BUILTIN_VPERMI2VARD256,
29901 IX86_BUILTIN_VPERMT2VARPD256,
29902 IX86_BUILTIN_VPERMT2VARPD256_MASKZ,
29903 IX86_BUILTIN_VPERMT2VARPS256,
29904 IX86_BUILTIN_VPERMT2VARPS256_MASKZ,
29905 IX86_BUILTIN_VPERMI2VARPD256,
29906 IX86_BUILTIN_VPERMI2VARPS256,
29907 IX86_BUILTIN_VPERMT2VARQ128,
29908 IX86_BUILTIN_VPERMT2VARQ128_MASKZ,
29909 IX86_BUILTIN_VPERMT2VARD128,
29910 IX86_BUILTIN_VPERMT2VARD128_MASKZ,
29911 IX86_BUILTIN_VPERMI2VARQ128,
29912 IX86_BUILTIN_VPERMI2VARD128,
29913 IX86_BUILTIN_VPERMT2VARPD128,
29914 IX86_BUILTIN_VPERMT2VARPD128_MASKZ,
29915 IX86_BUILTIN_VPERMT2VARPS128,
29916 IX86_BUILTIN_VPERMT2VARPS128_MASKZ,
29917 IX86_BUILTIN_VPERMI2VARPD128,
29918 IX86_BUILTIN_VPERMI2VARPS128,
29919 IX86_BUILTIN_PSHUFB256_MASK,
29920 IX86_BUILTIN_PSHUFB128_MASK,
29921 IX86_BUILTIN_PSHUFHW256_MASK,
29922 IX86_BUILTIN_PSHUFHW128_MASK,
29923 IX86_BUILTIN_PSHUFLW256_MASK,
29924 IX86_BUILTIN_PSHUFLW128_MASK,
29925 IX86_BUILTIN_PSHUFD256_MASK,
29926 IX86_BUILTIN_PSHUFD128_MASK,
29927 IX86_BUILTIN_SHUFPD256_MASK,
29928 IX86_BUILTIN_SHUFPD128_MASK,
29929 IX86_BUILTIN_SHUFPS256_MASK,
29930 IX86_BUILTIN_SHUFPS128_MASK,
29931 IX86_BUILTIN_PROLVQ256,
29932 IX86_BUILTIN_PROLVQ128,
29933 IX86_BUILTIN_PROLQ256,
29934 IX86_BUILTIN_PROLQ128,
29935 IX86_BUILTIN_PRORVQ256,
29936 IX86_BUILTIN_PRORVQ128,
29937 IX86_BUILTIN_PRORQ256,
29938 IX86_BUILTIN_PRORQ128,
29939 IX86_BUILTIN_PSRAVQ128,
29940 IX86_BUILTIN_PSRAVQ256,
29941 IX86_BUILTIN_PSLLVV4DI_MASK,
29942 IX86_BUILTIN_PSLLVV2DI_MASK,
29943 IX86_BUILTIN_PSLLVV8SI_MASK,
29944 IX86_BUILTIN_PSLLVV4SI_MASK,
29945 IX86_BUILTIN_PSRAVV8SI_MASK,
29946 IX86_BUILTIN_PSRAVV4SI_MASK,
29947 IX86_BUILTIN_PSRLVV4DI_MASK,
29948 IX86_BUILTIN_PSRLVV2DI_MASK,
29949 IX86_BUILTIN_PSRLVV8SI_MASK,
29950 IX86_BUILTIN_PSRLVV4SI_MASK,
29951 IX86_BUILTIN_PSRAWI256_MASK,
29952 IX86_BUILTIN_PSRAW256_MASK,
29953 IX86_BUILTIN_PSRAWI128_MASK,
29954 IX86_BUILTIN_PSRAW128_MASK,
29955 IX86_BUILTIN_PSRLWI256_MASK,
29956 IX86_BUILTIN_PSRLW256_MASK,
29957 IX86_BUILTIN_PSRLWI128_MASK,
29958 IX86_BUILTIN_PSRLW128_MASK,
29959 IX86_BUILTIN_PRORVD256,
29960 IX86_BUILTIN_PROLVD256,
29961 IX86_BUILTIN_PRORD256,
29962 IX86_BUILTIN_PROLD256,
29963 IX86_BUILTIN_PRORVD128,
29964 IX86_BUILTIN_PROLVD128,
29965 IX86_BUILTIN_PRORD128,
29966 IX86_BUILTIN_PROLD128,
29967 IX86_BUILTIN_FPCLASSPD256,
29968 IX86_BUILTIN_FPCLASSPD128,
29969 IX86_BUILTIN_FPCLASSSD,
29970 IX86_BUILTIN_FPCLASSPS256,
29971 IX86_BUILTIN_FPCLASSPS128,
29972 IX86_BUILTIN_FPCLASSSS,
29973 IX86_BUILTIN_CVTB2MASK128,
29974 IX86_BUILTIN_CVTB2MASK256,
29975 IX86_BUILTIN_CVTW2MASK128,
29976 IX86_BUILTIN_CVTW2MASK256,
29977 IX86_BUILTIN_CVTD2MASK128,
29978 IX86_BUILTIN_CVTD2MASK256,
29979 IX86_BUILTIN_CVTQ2MASK128,
29980 IX86_BUILTIN_CVTQ2MASK256,
29981 IX86_BUILTIN_CVTMASK2B128,
29982 IX86_BUILTIN_CVTMASK2B256,
29983 IX86_BUILTIN_CVTMASK2W128,
29984 IX86_BUILTIN_CVTMASK2W256,
29985 IX86_BUILTIN_CVTMASK2D128,
29986 IX86_BUILTIN_CVTMASK2D256,
29987 IX86_BUILTIN_CVTMASK2Q128,
29988 IX86_BUILTIN_CVTMASK2Q256,
29989 IX86_BUILTIN_PCMPEQB128_MASK,
29990 IX86_BUILTIN_PCMPEQB256_MASK,
29991 IX86_BUILTIN_PCMPEQW128_MASK,
29992 IX86_BUILTIN_PCMPEQW256_MASK,
29993 IX86_BUILTIN_PCMPEQD128_MASK,
29994 IX86_BUILTIN_PCMPEQD256_MASK,
29995 IX86_BUILTIN_PCMPEQQ128_MASK,
29996 IX86_BUILTIN_PCMPEQQ256_MASK,
29997 IX86_BUILTIN_PCMPGTB128_MASK,
29998 IX86_BUILTIN_PCMPGTB256_MASK,
29999 IX86_BUILTIN_PCMPGTW128_MASK,
30000 IX86_BUILTIN_PCMPGTW256_MASK,
30001 IX86_BUILTIN_PCMPGTD128_MASK,
30002 IX86_BUILTIN_PCMPGTD256_MASK,
30003 IX86_BUILTIN_PCMPGTQ128_MASK,
30004 IX86_BUILTIN_PCMPGTQ256_MASK,
30005 IX86_BUILTIN_PTESTMB128,
30006 IX86_BUILTIN_PTESTMB256,
30007 IX86_BUILTIN_PTESTMW128,
30008 IX86_BUILTIN_PTESTMW256,
30009 IX86_BUILTIN_PTESTMD128,
30010 IX86_BUILTIN_PTESTMD256,
30011 IX86_BUILTIN_PTESTMQ128,
30012 IX86_BUILTIN_PTESTMQ256,
30013 IX86_BUILTIN_PTESTNMB128,
30014 IX86_BUILTIN_PTESTNMB256,
30015 IX86_BUILTIN_PTESTNMW128,
30016 IX86_BUILTIN_PTESTNMW256,
30017 IX86_BUILTIN_PTESTNMD128,
30018 IX86_BUILTIN_PTESTNMD256,
30019 IX86_BUILTIN_PTESTNMQ128,
30020 IX86_BUILTIN_PTESTNMQ256,
30021 IX86_BUILTIN_PBROADCASTMB128,
30022 IX86_BUILTIN_PBROADCASTMB256,
30023 IX86_BUILTIN_PBROADCASTMW128,
30024 IX86_BUILTIN_PBROADCASTMW256,
30025 IX86_BUILTIN_COMPRESSPD256,
30026 IX86_BUILTIN_COMPRESSPD128,
30027 IX86_BUILTIN_COMPRESSPS256,
30028 IX86_BUILTIN_COMPRESSPS128,
30029 IX86_BUILTIN_PCOMPRESSQ256,
30030 IX86_BUILTIN_PCOMPRESSQ128,
30031 IX86_BUILTIN_PCOMPRESSD256,
30032 IX86_BUILTIN_PCOMPRESSD128,
30033 IX86_BUILTIN_EXPANDPD256,
30034 IX86_BUILTIN_EXPANDPD128,
30035 IX86_BUILTIN_EXPANDPS256,
30036 IX86_BUILTIN_EXPANDPS128,
30037 IX86_BUILTIN_PEXPANDQ256,
30038 IX86_BUILTIN_PEXPANDQ128,
30039 IX86_BUILTIN_PEXPANDD256,
30040 IX86_BUILTIN_PEXPANDD128,
30041 IX86_BUILTIN_EXPANDPD256Z,
30042 IX86_BUILTIN_EXPANDPD128Z,
30043 IX86_BUILTIN_EXPANDPS256Z,
30044 IX86_BUILTIN_EXPANDPS128Z,
30045 IX86_BUILTIN_PEXPANDQ256Z,
30046 IX86_BUILTIN_PEXPANDQ128Z,
30047 IX86_BUILTIN_PEXPANDD256Z,
30048 IX86_BUILTIN_PEXPANDD128Z,
30049 IX86_BUILTIN_PMAXSD256_MASK,
30050 IX86_BUILTIN_PMINSD256_MASK,
30051 IX86_BUILTIN_PMAXUD256_MASK,
30052 IX86_BUILTIN_PMINUD256_MASK,
30053 IX86_BUILTIN_PMAXSD128_MASK,
30054 IX86_BUILTIN_PMINSD128_MASK,
30055 IX86_BUILTIN_PMAXUD128_MASK,
30056 IX86_BUILTIN_PMINUD128_MASK,
30057 IX86_BUILTIN_PMAXSQ256_MASK,
30058 IX86_BUILTIN_PMINSQ256_MASK,
30059 IX86_BUILTIN_PMAXUQ256_MASK,
30060 IX86_BUILTIN_PMINUQ256_MASK,
30061 IX86_BUILTIN_PMAXSQ128_MASK,
30062 IX86_BUILTIN_PMINSQ128_MASK,
30063 IX86_BUILTIN_PMAXUQ128_MASK,
30064 IX86_BUILTIN_PMINUQ128_MASK,
30065 IX86_BUILTIN_PMINSB256_MASK,
30066 IX86_BUILTIN_PMINUB256_MASK,
30067 IX86_BUILTIN_PMAXSB256_MASK,
30068 IX86_BUILTIN_PMAXUB256_MASK,
30069 IX86_BUILTIN_PMINSB128_MASK,
30070 IX86_BUILTIN_PMINUB128_MASK,
30071 IX86_BUILTIN_PMAXSB128_MASK,
30072 IX86_BUILTIN_PMAXUB128_MASK,
30073 IX86_BUILTIN_PMINSW256_MASK,
30074 IX86_BUILTIN_PMINUW256_MASK,
30075 IX86_BUILTIN_PMAXSW256_MASK,
30076 IX86_BUILTIN_PMAXUW256_MASK,
30077 IX86_BUILTIN_PMINSW128_MASK,
30078 IX86_BUILTIN_PMINUW128_MASK,
30079 IX86_BUILTIN_PMAXSW128_MASK,
30080 IX86_BUILTIN_PMAXUW128_MASK,
30081 IX86_BUILTIN_VPCONFLICTQ256,
30082 IX86_BUILTIN_VPCONFLICTD256,
30083 IX86_BUILTIN_VPCLZCNTQ256,
30084 IX86_BUILTIN_VPCLZCNTD256,
30085 IX86_BUILTIN_UNPCKHPD256_MASK,
30086 IX86_BUILTIN_UNPCKHPD128_MASK,
30087 IX86_BUILTIN_UNPCKHPS256_MASK,
30088 IX86_BUILTIN_UNPCKHPS128_MASK,
30089 IX86_BUILTIN_UNPCKLPD256_MASK,
30090 IX86_BUILTIN_UNPCKLPD128_MASK,
30091 IX86_BUILTIN_UNPCKLPS256_MASK,
30092 IX86_BUILTIN_VPCONFLICTQ128,
30093 IX86_BUILTIN_VPCONFLICTD128,
30094 IX86_BUILTIN_VPCLZCNTQ128,
30095 IX86_BUILTIN_VPCLZCNTD128,
30096 IX86_BUILTIN_UNPCKLPS128_MASK,
30097 IX86_BUILTIN_ALIGND256,
30098 IX86_BUILTIN_ALIGNQ256,
30099 IX86_BUILTIN_ALIGND128,
30100 IX86_BUILTIN_ALIGNQ128,
30101 IX86_BUILTIN_CVTPS2PH256_MASK,
30102 IX86_BUILTIN_CVTPS2PH_MASK,
30103 IX86_BUILTIN_CVTPH2PS_MASK,
30104 IX86_BUILTIN_CVTPH2PS256_MASK,
30105 IX86_BUILTIN_PUNPCKHDQ128_MASK,
30106 IX86_BUILTIN_PUNPCKHDQ256_MASK,
30107 IX86_BUILTIN_PUNPCKHQDQ128_MASK,
30108 IX86_BUILTIN_PUNPCKHQDQ256_MASK,
30109 IX86_BUILTIN_PUNPCKLDQ128_MASK,
30110 IX86_BUILTIN_PUNPCKLDQ256_MASK,
30111 IX86_BUILTIN_PUNPCKLQDQ128_MASK,
30112 IX86_BUILTIN_PUNPCKLQDQ256_MASK,
30113 IX86_BUILTIN_PUNPCKHBW128_MASK,
30114 IX86_BUILTIN_PUNPCKHBW256_MASK,
30115 IX86_BUILTIN_PUNPCKHWD128_MASK,
30116 IX86_BUILTIN_PUNPCKHWD256_MASK,
30117 IX86_BUILTIN_PUNPCKLBW128_MASK,
30118 IX86_BUILTIN_PUNPCKLBW256_MASK,
30119 IX86_BUILTIN_PUNPCKLWD128_MASK,
30120 IX86_BUILTIN_PUNPCKLWD256_MASK,
30121 IX86_BUILTIN_PSLLVV16HI,
30122 IX86_BUILTIN_PSLLVV8HI,
30123 IX86_BUILTIN_PACKSSDW256_MASK,
30124 IX86_BUILTIN_PACKSSDW128_MASK,
30125 IX86_BUILTIN_PACKUSDW256_MASK,
30126 IX86_BUILTIN_PACKUSDW128_MASK,
30127 IX86_BUILTIN_PAVGB256_MASK,
30128 IX86_BUILTIN_PAVGW256_MASK,
30129 IX86_BUILTIN_PAVGB128_MASK,
30130 IX86_BUILTIN_PAVGW128_MASK,
30131 IX86_BUILTIN_VPERMVARSF256_MASK,
30132 IX86_BUILTIN_VPERMVARDF256_MASK,
30133 IX86_BUILTIN_VPERMDF256_MASK,
30134 IX86_BUILTIN_PABSB256_MASK,
30135 IX86_BUILTIN_PABSB128_MASK,
30136 IX86_BUILTIN_PABSW256_MASK,
30137 IX86_BUILTIN_PABSW128_MASK,
30138 IX86_BUILTIN_VPERMILVARPD_MASK,
30139 IX86_BUILTIN_VPERMILVARPS_MASK,
30140 IX86_BUILTIN_VPERMILVARPD256_MASK,
30141 IX86_BUILTIN_VPERMILVARPS256_MASK,
30142 IX86_BUILTIN_VPERMILPD_MASK,
30143 IX86_BUILTIN_VPERMILPS_MASK,
30144 IX86_BUILTIN_VPERMILPD256_MASK,
30145 IX86_BUILTIN_VPERMILPS256_MASK,
30146 IX86_BUILTIN_BLENDMQ256,
30147 IX86_BUILTIN_BLENDMD256,
30148 IX86_BUILTIN_BLENDMPD256,
30149 IX86_BUILTIN_BLENDMPS256,
30150 IX86_BUILTIN_BLENDMQ128,
30151 IX86_BUILTIN_BLENDMD128,
30152 IX86_BUILTIN_BLENDMPD128,
30153 IX86_BUILTIN_BLENDMPS128,
30154 IX86_BUILTIN_BLENDMW256,
30155 IX86_BUILTIN_BLENDMB256,
30156 IX86_BUILTIN_BLENDMW128,
30157 IX86_BUILTIN_BLENDMB128,
30158 IX86_BUILTIN_PMULLD256_MASK,
30159 IX86_BUILTIN_PMULLD128_MASK,
30160 IX86_BUILTIN_PMULUDQ256_MASK,
30161 IX86_BUILTIN_PMULDQ256_MASK,
30162 IX86_BUILTIN_PMULDQ128_MASK,
30163 IX86_BUILTIN_PMULUDQ128_MASK,
30164 IX86_BUILTIN_CVTPD2PS256_MASK,
30165 IX86_BUILTIN_CVTPD2PS_MASK,
30166 IX86_BUILTIN_VPERMVARSI256_MASK,
30167 IX86_BUILTIN_VPERMVARDI256_MASK,
30168 IX86_BUILTIN_VPERMDI256_MASK,
30169 IX86_BUILTIN_CMPQ256,
30170 IX86_BUILTIN_CMPD256,
30171 IX86_BUILTIN_UCMPQ256,
30172 IX86_BUILTIN_UCMPD256,
30173 IX86_BUILTIN_CMPB256,
30174 IX86_BUILTIN_CMPW256,
30175 IX86_BUILTIN_UCMPB256,
30176 IX86_BUILTIN_UCMPW256,
30177 IX86_BUILTIN_CMPPD256_MASK,
30178 IX86_BUILTIN_CMPPS256_MASK,
30179 IX86_BUILTIN_CMPQ128,
30180 IX86_BUILTIN_CMPD128,
30181 IX86_BUILTIN_UCMPQ128,
30182 IX86_BUILTIN_UCMPD128,
30183 IX86_BUILTIN_CMPB128,
30184 IX86_BUILTIN_CMPW128,
30185 IX86_BUILTIN_UCMPB128,
30186 IX86_BUILTIN_UCMPW128,
30187 IX86_BUILTIN_CMPPD128_MASK,
30188 IX86_BUILTIN_CMPPS128_MASK,
30190 IX86_BUILTIN_GATHER3SIV8SF,
30191 IX86_BUILTIN_GATHER3SIV4SF,
30192 IX86_BUILTIN_GATHER3SIV4DF,
30193 IX86_BUILTIN_GATHER3SIV2DF,
30194 IX86_BUILTIN_GATHER3DIV8SF,
30195 IX86_BUILTIN_GATHER3DIV4SF,
30196 IX86_BUILTIN_GATHER3DIV4DF,
30197 IX86_BUILTIN_GATHER3DIV2DF,
30198 IX86_BUILTIN_GATHER3SIV8SI,
30199 IX86_BUILTIN_GATHER3SIV4SI,
30200 IX86_BUILTIN_GATHER3SIV4DI,
30201 IX86_BUILTIN_GATHER3SIV2DI,
30202 IX86_BUILTIN_GATHER3DIV8SI,
30203 IX86_BUILTIN_GATHER3DIV4SI,
30204 IX86_BUILTIN_GATHER3DIV4DI,
30205 IX86_BUILTIN_GATHER3DIV2DI,
30206 IX86_BUILTIN_SCATTERSIV8SF,
30207 IX86_BUILTIN_SCATTERSIV4SF,
30208 IX86_BUILTIN_SCATTERSIV4DF,
30209 IX86_BUILTIN_SCATTERSIV2DF,
30210 IX86_BUILTIN_SCATTERDIV8SF,
30211 IX86_BUILTIN_SCATTERDIV4SF,
30212 IX86_BUILTIN_SCATTERDIV4DF,
30213 IX86_BUILTIN_SCATTERDIV2DF,
30214 IX86_BUILTIN_SCATTERSIV8SI,
30215 IX86_BUILTIN_SCATTERSIV4SI,
30216 IX86_BUILTIN_SCATTERSIV4DI,
30217 IX86_BUILTIN_SCATTERSIV2DI,
30218 IX86_BUILTIN_SCATTERDIV8SI,
30219 IX86_BUILTIN_SCATTERDIV4SI,
30220 IX86_BUILTIN_SCATTERDIV4DI,
30221 IX86_BUILTIN_SCATTERDIV2DI,
30223 /* AVX512DQ. */
30224 IX86_BUILTIN_RANGESD128,
30225 IX86_BUILTIN_RANGESS128,
30226 IX86_BUILTIN_KUNPCKWD,
30227 IX86_BUILTIN_KUNPCKDQ,
30228 IX86_BUILTIN_BROADCASTF32x2_512,
30229 IX86_BUILTIN_BROADCASTI32x2_512,
30230 IX86_BUILTIN_BROADCASTF64X2_512,
30231 IX86_BUILTIN_BROADCASTI64X2_512,
30232 IX86_BUILTIN_BROADCASTF32X8_512,
30233 IX86_BUILTIN_BROADCASTI32X8_512,
30234 IX86_BUILTIN_EXTRACTF64X2_512,
30235 IX86_BUILTIN_EXTRACTF32X8,
30236 IX86_BUILTIN_EXTRACTI64X2_512,
30237 IX86_BUILTIN_EXTRACTI32X8,
30238 IX86_BUILTIN_REDUCEPD512_MASK,
30239 IX86_BUILTIN_REDUCEPS512_MASK,
30240 IX86_BUILTIN_PMULLQ512,
30241 IX86_BUILTIN_XORPD512,
30242 IX86_BUILTIN_XORPS512,
30243 IX86_BUILTIN_ORPD512,
30244 IX86_BUILTIN_ORPS512,
30245 IX86_BUILTIN_ANDPD512,
30246 IX86_BUILTIN_ANDPS512,
30247 IX86_BUILTIN_ANDNPD512,
30248 IX86_BUILTIN_ANDNPS512,
30249 IX86_BUILTIN_INSERTF32X8,
30250 IX86_BUILTIN_INSERTI32X8,
30251 IX86_BUILTIN_INSERTF64X2_512,
30252 IX86_BUILTIN_INSERTI64X2_512,
30253 IX86_BUILTIN_FPCLASSPD512,
30254 IX86_BUILTIN_FPCLASSPS512,
30255 IX86_BUILTIN_CVTD2MASK512,
30256 IX86_BUILTIN_CVTQ2MASK512,
30257 IX86_BUILTIN_CVTMASK2D512,
30258 IX86_BUILTIN_CVTMASK2Q512,
30259 IX86_BUILTIN_CVTPD2QQ512,
30260 IX86_BUILTIN_CVTPS2QQ512,
30261 IX86_BUILTIN_CVTPD2UQQ512,
30262 IX86_BUILTIN_CVTPS2UQQ512,
30263 IX86_BUILTIN_CVTQQ2PS512,
30264 IX86_BUILTIN_CVTUQQ2PS512,
30265 IX86_BUILTIN_CVTQQ2PD512,
30266 IX86_BUILTIN_CVTUQQ2PD512,
30267 IX86_BUILTIN_CVTTPS2QQ512,
30268 IX86_BUILTIN_CVTTPS2UQQ512,
30269 IX86_BUILTIN_CVTTPD2QQ512,
30270 IX86_BUILTIN_CVTTPD2UQQ512,
30271 IX86_BUILTIN_RANGEPS512,
30272 IX86_BUILTIN_RANGEPD512,
30274 /* AVX512BW. */
30275 IX86_BUILTIN_PACKUSDW512,
30276 IX86_BUILTIN_PACKSSDW512,
30277 IX86_BUILTIN_LOADDQUHI512_MASK,
30278 IX86_BUILTIN_LOADDQUQI512_MASK,
30279 IX86_BUILTIN_PSLLDQ512,
30280 IX86_BUILTIN_PSRLDQ512,
30281 IX86_BUILTIN_STOREDQUHI512_MASK,
30282 IX86_BUILTIN_STOREDQUQI512_MASK,
30283 IX86_BUILTIN_PALIGNR512,
30284 IX86_BUILTIN_PALIGNR512_MASK,
30285 IX86_BUILTIN_MOVDQUHI512_MASK,
30286 IX86_BUILTIN_MOVDQUQI512_MASK,
30287 IX86_BUILTIN_PSADBW512,
30288 IX86_BUILTIN_DBPSADBW512,
30289 IX86_BUILTIN_PBROADCASTB512,
30290 IX86_BUILTIN_PBROADCASTB512_GPR,
30291 IX86_BUILTIN_PBROADCASTW512,
30292 IX86_BUILTIN_PBROADCASTW512_GPR,
30293 IX86_BUILTIN_PMOVSXBW512_MASK,
30294 IX86_BUILTIN_PMOVZXBW512_MASK,
30295 IX86_BUILTIN_VPERMVARHI512_MASK,
30296 IX86_BUILTIN_VPERMT2VARHI512,
30297 IX86_BUILTIN_VPERMT2VARHI512_MASKZ,
30298 IX86_BUILTIN_VPERMI2VARHI512,
30299 IX86_BUILTIN_PAVGB512,
30300 IX86_BUILTIN_PAVGW512,
30301 IX86_BUILTIN_PADDB512,
30302 IX86_BUILTIN_PSUBB512,
30303 IX86_BUILTIN_PSUBSB512,
30304 IX86_BUILTIN_PADDSB512,
30305 IX86_BUILTIN_PSUBUSB512,
30306 IX86_BUILTIN_PADDUSB512,
30307 IX86_BUILTIN_PSUBW512,
30308 IX86_BUILTIN_PADDW512,
30309 IX86_BUILTIN_PSUBSW512,
30310 IX86_BUILTIN_PADDSW512,
30311 IX86_BUILTIN_PSUBUSW512,
30312 IX86_BUILTIN_PADDUSW512,
30313 IX86_BUILTIN_PMAXUW512,
30314 IX86_BUILTIN_PMAXSW512,
30315 IX86_BUILTIN_PMINUW512,
30316 IX86_BUILTIN_PMINSW512,
30317 IX86_BUILTIN_PMAXUB512,
30318 IX86_BUILTIN_PMAXSB512,
30319 IX86_BUILTIN_PMINUB512,
30320 IX86_BUILTIN_PMINSB512,
30321 IX86_BUILTIN_PMOVWB512,
30322 IX86_BUILTIN_PMOVSWB512,
30323 IX86_BUILTIN_PMOVUSWB512,
30324 IX86_BUILTIN_PMULHRSW512_MASK,
30325 IX86_BUILTIN_PMULHUW512_MASK,
30326 IX86_BUILTIN_PMULHW512_MASK,
30327 IX86_BUILTIN_PMULLW512_MASK,
30328 IX86_BUILTIN_PSLLWI512_MASK,
30329 IX86_BUILTIN_PSLLW512_MASK,
30330 IX86_BUILTIN_PACKSSWB512,
30331 IX86_BUILTIN_PACKUSWB512,
30332 IX86_BUILTIN_PSRAVV32HI,
30333 IX86_BUILTIN_PMADDUBSW512_MASK,
30334 IX86_BUILTIN_PMADDWD512_MASK,
30335 IX86_BUILTIN_PSRLVV32HI,
30336 IX86_BUILTIN_PUNPCKHBW512,
30337 IX86_BUILTIN_PUNPCKHWD512,
30338 IX86_BUILTIN_PUNPCKLBW512,
30339 IX86_BUILTIN_PUNPCKLWD512,
30340 IX86_BUILTIN_PSHUFB512,
30341 IX86_BUILTIN_PSHUFHW512,
30342 IX86_BUILTIN_PSHUFLW512,
30343 IX86_BUILTIN_PSRAWI512,
30344 IX86_BUILTIN_PSRAW512,
30345 IX86_BUILTIN_PSRLWI512,
30346 IX86_BUILTIN_PSRLW512,
30347 IX86_BUILTIN_CVTB2MASK512,
30348 IX86_BUILTIN_CVTW2MASK512,
30349 IX86_BUILTIN_CVTMASK2B512,
30350 IX86_BUILTIN_CVTMASK2W512,
30351 IX86_BUILTIN_PCMPEQB512_MASK,
30352 IX86_BUILTIN_PCMPEQW512_MASK,
30353 IX86_BUILTIN_PCMPGTB512_MASK,
30354 IX86_BUILTIN_PCMPGTW512_MASK,
30355 IX86_BUILTIN_PTESTMB512,
30356 IX86_BUILTIN_PTESTMW512,
30357 IX86_BUILTIN_PTESTNMB512,
30358 IX86_BUILTIN_PTESTNMW512,
30359 IX86_BUILTIN_PSLLVV32HI,
30360 IX86_BUILTIN_PABSB512,
30361 IX86_BUILTIN_PABSW512,
30362 IX86_BUILTIN_BLENDMW512,
30363 IX86_BUILTIN_BLENDMB512,
30364 IX86_BUILTIN_CMPB512,
30365 IX86_BUILTIN_CMPW512,
30366 IX86_BUILTIN_UCMPB512,
30367 IX86_BUILTIN_UCMPW512,
30369 /* Alternate 4 and 8 element gather/scatter for the vectorizer
30370 where all operands are 32-byte or 64-byte wide respectively. */
30371 IX86_BUILTIN_GATHERALTSIV4DF,
30372 IX86_BUILTIN_GATHERALTDIV8SF,
30373 IX86_BUILTIN_GATHERALTSIV4DI,
30374 IX86_BUILTIN_GATHERALTDIV8SI,
30375 IX86_BUILTIN_GATHER3ALTDIV16SF,
30376 IX86_BUILTIN_GATHER3ALTDIV16SI,
30377 IX86_BUILTIN_GATHER3ALTSIV4DF,
30378 IX86_BUILTIN_GATHER3ALTDIV8SF,
30379 IX86_BUILTIN_GATHER3ALTSIV4DI,
30380 IX86_BUILTIN_GATHER3ALTDIV8SI,
30381 IX86_BUILTIN_GATHER3ALTSIV8DF,
30382 IX86_BUILTIN_GATHER3ALTSIV8DI,
30383 IX86_BUILTIN_GATHER3DIV16SF,
30384 IX86_BUILTIN_GATHER3DIV16SI,
30385 IX86_BUILTIN_GATHER3DIV8DF,
30386 IX86_BUILTIN_GATHER3DIV8DI,
30387 IX86_BUILTIN_GATHER3SIV16SF,
30388 IX86_BUILTIN_GATHER3SIV16SI,
30389 IX86_BUILTIN_GATHER3SIV8DF,
30390 IX86_BUILTIN_GATHER3SIV8DI,
30391 IX86_BUILTIN_SCATTERALTSIV8DF,
30392 IX86_BUILTIN_SCATTERALTDIV16SF,
30393 IX86_BUILTIN_SCATTERALTSIV8DI,
30394 IX86_BUILTIN_SCATTERALTDIV16SI,
30395 IX86_BUILTIN_SCATTERDIV16SF,
30396 IX86_BUILTIN_SCATTERDIV16SI,
30397 IX86_BUILTIN_SCATTERDIV8DF,
30398 IX86_BUILTIN_SCATTERDIV8DI,
30399 IX86_BUILTIN_SCATTERSIV16SF,
30400 IX86_BUILTIN_SCATTERSIV16SI,
30401 IX86_BUILTIN_SCATTERSIV8DF,
30402 IX86_BUILTIN_SCATTERSIV8DI,
30404 /* AVX512PF */
30405 IX86_BUILTIN_GATHERPFQPD,
30406 IX86_BUILTIN_GATHERPFDPS,
30407 IX86_BUILTIN_GATHERPFDPD,
30408 IX86_BUILTIN_GATHERPFQPS,
30409 IX86_BUILTIN_SCATTERPFDPD,
30410 IX86_BUILTIN_SCATTERPFDPS,
30411 IX86_BUILTIN_SCATTERPFQPD,
30412 IX86_BUILTIN_SCATTERPFQPS,
30414 /* AVX-512ER */
30415 IX86_BUILTIN_EXP2PD_MASK,
30416 IX86_BUILTIN_EXP2PS_MASK,
30417 IX86_BUILTIN_EXP2PS,
30418 IX86_BUILTIN_RCP28PD,
30419 IX86_BUILTIN_RCP28PS,
30420 IX86_BUILTIN_RCP28SD,
30421 IX86_BUILTIN_RCP28SS,
30422 IX86_BUILTIN_RSQRT28PD,
30423 IX86_BUILTIN_RSQRT28PS,
30424 IX86_BUILTIN_RSQRT28SD,
30425 IX86_BUILTIN_RSQRT28SS,
30427 /* AVX-512IFMA */
30428 IX86_BUILTIN_VPMADD52LUQ512,
30429 IX86_BUILTIN_VPMADD52HUQ512,
30430 IX86_BUILTIN_VPMADD52LUQ256,
30431 IX86_BUILTIN_VPMADD52HUQ256,
30432 IX86_BUILTIN_VPMADD52LUQ128,
30433 IX86_BUILTIN_VPMADD52HUQ128,
30434 IX86_BUILTIN_VPMADD52LUQ512_MASKZ,
30435 IX86_BUILTIN_VPMADD52HUQ512_MASKZ,
30436 IX86_BUILTIN_VPMADD52LUQ256_MASKZ,
30437 IX86_BUILTIN_VPMADD52HUQ256_MASKZ,
30438 IX86_BUILTIN_VPMADD52LUQ128_MASKZ,
30439 IX86_BUILTIN_VPMADD52HUQ128_MASKZ,
30441 /* AVX-512VBMI */
30442 IX86_BUILTIN_VPMULTISHIFTQB512,
30443 IX86_BUILTIN_VPMULTISHIFTQB256,
30444 IX86_BUILTIN_VPMULTISHIFTQB128,
30445 IX86_BUILTIN_VPERMVARQI512_MASK,
30446 IX86_BUILTIN_VPERMT2VARQI512,
30447 IX86_BUILTIN_VPERMT2VARQI512_MASKZ,
30448 IX86_BUILTIN_VPERMI2VARQI512,
30449 IX86_BUILTIN_VPERMVARQI256_MASK,
30450 IX86_BUILTIN_VPERMVARQI128_MASK,
30451 IX86_BUILTIN_VPERMT2VARQI256,
30452 IX86_BUILTIN_VPERMT2VARQI256_MASKZ,
30453 IX86_BUILTIN_VPERMT2VARQI128,
30454 IX86_BUILTIN_VPERMT2VARQI128_MASKZ,
30455 IX86_BUILTIN_VPERMI2VARQI256,
30456 IX86_BUILTIN_VPERMI2VARQI128,
30458 /* SHA builtins. */
30459 IX86_BUILTIN_SHA1MSG1,
30460 IX86_BUILTIN_SHA1MSG2,
30461 IX86_BUILTIN_SHA1NEXTE,
30462 IX86_BUILTIN_SHA1RNDS4,
30463 IX86_BUILTIN_SHA256MSG1,
30464 IX86_BUILTIN_SHA256MSG2,
30465 IX86_BUILTIN_SHA256RNDS2,
30467 /* CLWB instructions. */
30468 IX86_BUILTIN_CLWB,
30470 /* PCOMMIT instructions. */
30471 IX86_BUILTIN_PCOMMIT,
30473 /* CLFLUSHOPT instructions. */
30474 IX86_BUILTIN_CLFLUSHOPT,
30476 /* TFmode support builtins. */
30477 IX86_BUILTIN_INFQ,
30478 IX86_BUILTIN_HUGE_VALQ,
30479 IX86_BUILTIN_FABSQ,
30480 IX86_BUILTIN_COPYSIGNQ,
30482 /* Vectorizer support builtins. */
30483 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
30484 IX86_BUILTIN_CPYSGNPS,
30485 IX86_BUILTIN_CPYSGNPD,
30486 IX86_BUILTIN_CPYSGNPS256,
30487 IX86_BUILTIN_CPYSGNPS512,
30488 IX86_BUILTIN_CPYSGNPD256,
30489 IX86_BUILTIN_CPYSGNPD512,
30490 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
30491 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
30494 /* FMA4 instructions. */
30495 IX86_BUILTIN_VFMADDSS,
30496 IX86_BUILTIN_VFMADDSD,
30497 IX86_BUILTIN_VFMADDPS,
30498 IX86_BUILTIN_VFMADDPD,
30499 IX86_BUILTIN_VFMADDPS256,
30500 IX86_BUILTIN_VFMADDPD256,
30501 IX86_BUILTIN_VFMADDSUBPS,
30502 IX86_BUILTIN_VFMADDSUBPD,
30503 IX86_BUILTIN_VFMADDSUBPS256,
30504 IX86_BUILTIN_VFMADDSUBPD256,
30506 /* FMA3 instructions. */
30507 IX86_BUILTIN_VFMADDSS3,
30508 IX86_BUILTIN_VFMADDSD3,
30510 /* XOP instructions. */
30511 IX86_BUILTIN_VPCMOV,
30512 IX86_BUILTIN_VPCMOV_V2DI,
30513 IX86_BUILTIN_VPCMOV_V4SI,
30514 IX86_BUILTIN_VPCMOV_V8HI,
30515 IX86_BUILTIN_VPCMOV_V16QI,
30516 IX86_BUILTIN_VPCMOV_V4SF,
30517 IX86_BUILTIN_VPCMOV_V2DF,
30518 IX86_BUILTIN_VPCMOV256,
30519 IX86_BUILTIN_VPCMOV_V4DI256,
30520 IX86_BUILTIN_VPCMOV_V8SI256,
30521 IX86_BUILTIN_VPCMOV_V16HI256,
30522 IX86_BUILTIN_VPCMOV_V32QI256,
30523 IX86_BUILTIN_VPCMOV_V8SF256,
30524 IX86_BUILTIN_VPCMOV_V4DF256,
30526 IX86_BUILTIN_VPPERM,
30528 IX86_BUILTIN_VPMACSSWW,
30529 IX86_BUILTIN_VPMACSWW,
30530 IX86_BUILTIN_VPMACSSWD,
30531 IX86_BUILTIN_VPMACSWD,
30532 IX86_BUILTIN_VPMACSSDD,
30533 IX86_BUILTIN_VPMACSDD,
30534 IX86_BUILTIN_VPMACSSDQL,
30535 IX86_BUILTIN_VPMACSSDQH,
30536 IX86_BUILTIN_VPMACSDQL,
30537 IX86_BUILTIN_VPMACSDQH,
30538 IX86_BUILTIN_VPMADCSSWD,
30539 IX86_BUILTIN_VPMADCSWD,
30541 IX86_BUILTIN_VPHADDBW,
30542 IX86_BUILTIN_VPHADDBD,
30543 IX86_BUILTIN_VPHADDBQ,
30544 IX86_BUILTIN_VPHADDWD,
30545 IX86_BUILTIN_VPHADDWQ,
30546 IX86_BUILTIN_VPHADDDQ,
30547 IX86_BUILTIN_VPHADDUBW,
30548 IX86_BUILTIN_VPHADDUBD,
30549 IX86_BUILTIN_VPHADDUBQ,
30550 IX86_BUILTIN_VPHADDUWD,
30551 IX86_BUILTIN_VPHADDUWQ,
30552 IX86_BUILTIN_VPHADDUDQ,
30553 IX86_BUILTIN_VPHSUBBW,
30554 IX86_BUILTIN_VPHSUBWD,
30555 IX86_BUILTIN_VPHSUBDQ,
30557 IX86_BUILTIN_VPROTB,
30558 IX86_BUILTIN_VPROTW,
30559 IX86_BUILTIN_VPROTD,
30560 IX86_BUILTIN_VPROTQ,
30561 IX86_BUILTIN_VPROTB_IMM,
30562 IX86_BUILTIN_VPROTW_IMM,
30563 IX86_BUILTIN_VPROTD_IMM,
30564 IX86_BUILTIN_VPROTQ_IMM,
30566 IX86_BUILTIN_VPSHLB,
30567 IX86_BUILTIN_VPSHLW,
30568 IX86_BUILTIN_VPSHLD,
30569 IX86_BUILTIN_VPSHLQ,
30570 IX86_BUILTIN_VPSHAB,
30571 IX86_BUILTIN_VPSHAW,
30572 IX86_BUILTIN_VPSHAD,
30573 IX86_BUILTIN_VPSHAQ,
30575 IX86_BUILTIN_VFRCZSS,
30576 IX86_BUILTIN_VFRCZSD,
30577 IX86_BUILTIN_VFRCZPS,
30578 IX86_BUILTIN_VFRCZPD,
30579 IX86_BUILTIN_VFRCZPS256,
30580 IX86_BUILTIN_VFRCZPD256,
30582 IX86_BUILTIN_VPCOMEQUB,
30583 IX86_BUILTIN_VPCOMNEUB,
30584 IX86_BUILTIN_VPCOMLTUB,
30585 IX86_BUILTIN_VPCOMLEUB,
30586 IX86_BUILTIN_VPCOMGTUB,
30587 IX86_BUILTIN_VPCOMGEUB,
30588 IX86_BUILTIN_VPCOMFALSEUB,
30589 IX86_BUILTIN_VPCOMTRUEUB,
30591 IX86_BUILTIN_VPCOMEQUW,
30592 IX86_BUILTIN_VPCOMNEUW,
30593 IX86_BUILTIN_VPCOMLTUW,
30594 IX86_BUILTIN_VPCOMLEUW,
30595 IX86_BUILTIN_VPCOMGTUW,
30596 IX86_BUILTIN_VPCOMGEUW,
30597 IX86_BUILTIN_VPCOMFALSEUW,
30598 IX86_BUILTIN_VPCOMTRUEUW,
30600 IX86_BUILTIN_VPCOMEQUD,
30601 IX86_BUILTIN_VPCOMNEUD,
30602 IX86_BUILTIN_VPCOMLTUD,
30603 IX86_BUILTIN_VPCOMLEUD,
30604 IX86_BUILTIN_VPCOMGTUD,
30605 IX86_BUILTIN_VPCOMGEUD,
30606 IX86_BUILTIN_VPCOMFALSEUD,
30607 IX86_BUILTIN_VPCOMTRUEUD,
30609 IX86_BUILTIN_VPCOMEQUQ,
30610 IX86_BUILTIN_VPCOMNEUQ,
30611 IX86_BUILTIN_VPCOMLTUQ,
30612 IX86_BUILTIN_VPCOMLEUQ,
30613 IX86_BUILTIN_VPCOMGTUQ,
30614 IX86_BUILTIN_VPCOMGEUQ,
30615 IX86_BUILTIN_VPCOMFALSEUQ,
30616 IX86_BUILTIN_VPCOMTRUEUQ,
30618 IX86_BUILTIN_VPCOMEQB,
30619 IX86_BUILTIN_VPCOMNEB,
30620 IX86_BUILTIN_VPCOMLTB,
30621 IX86_BUILTIN_VPCOMLEB,
30622 IX86_BUILTIN_VPCOMGTB,
30623 IX86_BUILTIN_VPCOMGEB,
30624 IX86_BUILTIN_VPCOMFALSEB,
30625 IX86_BUILTIN_VPCOMTRUEB,
30627 IX86_BUILTIN_VPCOMEQW,
30628 IX86_BUILTIN_VPCOMNEW,
30629 IX86_BUILTIN_VPCOMLTW,
30630 IX86_BUILTIN_VPCOMLEW,
30631 IX86_BUILTIN_VPCOMGTW,
30632 IX86_BUILTIN_VPCOMGEW,
30633 IX86_BUILTIN_VPCOMFALSEW,
30634 IX86_BUILTIN_VPCOMTRUEW,
30636 IX86_BUILTIN_VPCOMEQD,
30637 IX86_BUILTIN_VPCOMNED,
30638 IX86_BUILTIN_VPCOMLTD,
30639 IX86_BUILTIN_VPCOMLED,
30640 IX86_BUILTIN_VPCOMGTD,
30641 IX86_BUILTIN_VPCOMGED,
30642 IX86_BUILTIN_VPCOMFALSED,
30643 IX86_BUILTIN_VPCOMTRUED,
30645 IX86_BUILTIN_VPCOMEQQ,
30646 IX86_BUILTIN_VPCOMNEQ,
30647 IX86_BUILTIN_VPCOMLTQ,
30648 IX86_BUILTIN_VPCOMLEQ,
30649 IX86_BUILTIN_VPCOMGTQ,
30650 IX86_BUILTIN_VPCOMGEQ,
30651 IX86_BUILTIN_VPCOMFALSEQ,
30652 IX86_BUILTIN_VPCOMTRUEQ,
30654 /* LWP instructions. */
30655 IX86_BUILTIN_LLWPCB,
30656 IX86_BUILTIN_SLWPCB,
30657 IX86_BUILTIN_LWPVAL32,
30658 IX86_BUILTIN_LWPVAL64,
30659 IX86_BUILTIN_LWPINS32,
30660 IX86_BUILTIN_LWPINS64,
30662 IX86_BUILTIN_CLZS,
30664 /* RTM */
30665 IX86_BUILTIN_XBEGIN,
30666 IX86_BUILTIN_XEND,
30667 IX86_BUILTIN_XABORT,
30668 IX86_BUILTIN_XTEST,
30670 /* MPX */
30671 IX86_BUILTIN_BNDMK,
30672 IX86_BUILTIN_BNDSTX,
30673 IX86_BUILTIN_BNDLDX,
30674 IX86_BUILTIN_BNDCL,
30675 IX86_BUILTIN_BNDCU,
30676 IX86_BUILTIN_BNDRET,
30677 IX86_BUILTIN_BNDNARROW,
30678 IX86_BUILTIN_BNDINT,
30679 IX86_BUILTIN_SIZEOF,
30680 IX86_BUILTIN_BNDLOWER,
30681 IX86_BUILTIN_BNDUPPER,
30683 /* BMI instructions. */
30684 IX86_BUILTIN_BEXTR32,
30685 IX86_BUILTIN_BEXTR64,
30686 IX86_BUILTIN_CTZS,
30688 /* TBM instructions. */
30689 IX86_BUILTIN_BEXTRI32,
30690 IX86_BUILTIN_BEXTRI64,
30692 /* BMI2 instructions. */
30693 IX86_BUILTIN_BZHI32,
30694 IX86_BUILTIN_BZHI64,
30695 IX86_BUILTIN_PDEP32,
30696 IX86_BUILTIN_PDEP64,
30697 IX86_BUILTIN_PEXT32,
30698 IX86_BUILTIN_PEXT64,
30700 /* ADX instructions. */
30701 IX86_BUILTIN_ADDCARRYX32,
30702 IX86_BUILTIN_ADDCARRYX64,
30704 /* SBB instructions. */
30705 IX86_BUILTIN_SBB32,
30706 IX86_BUILTIN_SBB64,
30708 /* FSGSBASE instructions. */
30709 IX86_BUILTIN_RDFSBASE32,
30710 IX86_BUILTIN_RDFSBASE64,
30711 IX86_BUILTIN_RDGSBASE32,
30712 IX86_BUILTIN_RDGSBASE64,
30713 IX86_BUILTIN_WRFSBASE32,
30714 IX86_BUILTIN_WRFSBASE64,
30715 IX86_BUILTIN_WRGSBASE32,
30716 IX86_BUILTIN_WRGSBASE64,
30718 /* RDRND instructions. */
30719 IX86_BUILTIN_RDRAND16_STEP,
30720 IX86_BUILTIN_RDRAND32_STEP,
30721 IX86_BUILTIN_RDRAND64_STEP,
30723 /* RDSEED instructions. */
30724 IX86_BUILTIN_RDSEED16_STEP,
30725 IX86_BUILTIN_RDSEED32_STEP,
30726 IX86_BUILTIN_RDSEED64_STEP,
30728 /* F16C instructions. */
30729 IX86_BUILTIN_CVTPH2PS,
30730 IX86_BUILTIN_CVTPH2PS256,
30731 IX86_BUILTIN_CVTPS2PH,
30732 IX86_BUILTIN_CVTPS2PH256,
30734 /* MONITORX and MWAITX instrucions. */
30735 IX86_BUILTIN_MONITORX,
30736 IX86_BUILTIN_MWAITX,
30738 /* CFString built-in for darwin */
30739 IX86_BUILTIN_CFSTRING,
30741 /* Builtins to get CPU type and supported features. */
30742 IX86_BUILTIN_CPU_INIT,
30743 IX86_BUILTIN_CPU_IS,
30744 IX86_BUILTIN_CPU_SUPPORTS,
30746 /* Read/write FLAGS register built-ins. */
30747 IX86_BUILTIN_READ_FLAGS,
30748 IX86_BUILTIN_WRITE_FLAGS,
30750 IX86_BUILTIN_MAX
30753 /* Table for the ix86 builtin decls. */
30754 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
30756 /* Table of all of the builtin functions that are possible with different ISA's
30757 but are waiting to be built until a function is declared to use that
30758 ISA. */
30759 struct builtin_isa {
30760 const char *name; /* function name */
30761 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
30762 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
30763 bool const_p; /* true if the declaration is constant */
30764 bool leaf_p; /* true if the declaration has leaf attribute */
30765 bool nothrow_p; /* true if the declaration has nothrow attribute */
30766 bool set_and_not_built_p;
30769 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
30771 /* Bits that can still enable any inclusion of a builtin. */
30772 static HOST_WIDE_INT deferred_isa_values = 0;
30774 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
30775 of which isa_flags to use in the ix86_builtins_isa array. Stores the
30776 function decl in the ix86_builtins array. Returns the function decl or
30777 NULL_TREE, if the builtin was not added.
30779 If the front end has a special hook for builtin functions, delay adding
30780 builtin functions that aren't in the current ISA until the ISA is changed
30781 with function specific optimization. Doing so, can save about 300K for the
30782 default compiler. When the builtin is expanded, check at that time whether
30783 it is valid.
30785 If the front end doesn't have a special hook, record all builtins, even if
30786 it isn't an instruction set in the current ISA in case the user uses
30787 function specific options for a different ISA, so that we don't get scope
30788 errors if a builtin is added in the middle of a function scope. */
30790 static inline tree
30791 def_builtin (HOST_WIDE_INT mask, const char *name,
30792 enum ix86_builtin_func_type tcode,
30793 enum ix86_builtins code)
30795 tree decl = NULL_TREE;
30797 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
30799 ix86_builtins_isa[(int) code].isa = mask;
30801 mask &= ~OPTION_MASK_ISA_64BIT;
30802 if (mask == 0
30803 || (mask & ix86_isa_flags) != 0
30804 || (lang_hooks.builtin_function
30805 == lang_hooks.builtin_function_ext_scope))
30808 tree type = ix86_get_builtin_func_type (tcode);
30809 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
30810 NULL, NULL_TREE);
30811 ix86_builtins[(int) code] = decl;
30812 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
30814 else
30816 /* Just a MASK where set_and_not_built_p == true can potentially
30817 include a builtin. */
30818 deferred_isa_values |= mask;
30819 ix86_builtins[(int) code] = NULL_TREE;
30820 ix86_builtins_isa[(int) code].tcode = tcode;
30821 ix86_builtins_isa[(int) code].name = name;
30822 ix86_builtins_isa[(int) code].leaf_p = false;
30823 ix86_builtins_isa[(int) code].nothrow_p = false;
30824 ix86_builtins_isa[(int) code].const_p = false;
30825 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
30829 return decl;
30832 /* Like def_builtin, but also marks the function decl "const". */
30834 static inline tree
30835 def_builtin_const (HOST_WIDE_INT mask, const char *name,
30836 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
30838 tree decl = def_builtin (mask, name, tcode, code);
30839 if (decl)
30840 TREE_READONLY (decl) = 1;
30841 else
30842 ix86_builtins_isa[(int) code].const_p = true;
30844 return decl;
30847 /* Add any new builtin functions for a given ISA that may not have been
30848 declared. This saves a bit of space compared to adding all of the
30849 declarations to the tree, even if we didn't use them. */
30851 static void
30852 ix86_add_new_builtins (HOST_WIDE_INT isa)
30854 if ((isa & deferred_isa_values) == 0)
30855 return;
30857 /* Bits in ISA value can be removed from potential isa values. */
30858 deferred_isa_values &= ~isa;
30860 int i;
30861 tree saved_current_target_pragma = current_target_pragma;
30862 current_target_pragma = NULL_TREE;
30864 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
30866 if ((ix86_builtins_isa[i].isa & isa) != 0
30867 && ix86_builtins_isa[i].set_and_not_built_p)
30869 tree decl, type;
30871 /* Don't define the builtin again. */
30872 ix86_builtins_isa[i].set_and_not_built_p = false;
30874 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
30875 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
30876 type, i, BUILT_IN_MD, NULL,
30877 NULL_TREE);
30879 ix86_builtins[i] = decl;
30880 if (ix86_builtins_isa[i].const_p)
30881 TREE_READONLY (decl) = 1;
30882 if (ix86_builtins_isa[i].leaf_p)
30883 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
30884 NULL_TREE);
30885 if (ix86_builtins_isa[i].nothrow_p)
30886 TREE_NOTHROW (decl) = 1;
30890 current_target_pragma = saved_current_target_pragma;
30893 /* Bits for builtin_description.flag. */
30895 /* Set when we don't support the comparison natively, and should
30896 swap_comparison in order to support it. */
30897 #define BUILTIN_DESC_SWAP_OPERANDS 1
30899 struct builtin_description
30901 const HOST_WIDE_INT mask;
30902 const enum insn_code icode;
30903 const char *const name;
30904 const enum ix86_builtins code;
30905 const enum rtx_code comparison;
30906 const int flag;
30909 static const struct builtin_description bdesc_comi[] =
30911 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
30912 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
30913 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
30914 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
30915 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
30916 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
30917 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
30918 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
30919 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
30920 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
30921 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
30922 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
30923 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
30924 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
30925 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
30926 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
30927 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
30928 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
30929 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
30930 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
30931 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
30932 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
30933 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
30934 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
30937 static const struct builtin_description bdesc_pcmpestr[] =
30939 /* SSE4.2 */
30940 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
30941 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
30942 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
30943 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
30944 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
30945 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
30946 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
30949 static const struct builtin_description bdesc_pcmpistr[] =
30951 /* SSE4.2 */
30952 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
30953 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
30954 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
30955 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
30956 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
30957 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
30958 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
30961 /* Special builtins with variable number of arguments. */
30962 static const struct builtin_description bdesc_special_args[] =
30964 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
30965 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
30966 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
30968 /* 80387 (for use internally for atomic compound assignment). */
30969 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
30970 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
30971 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
30972 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
30974 /* MMX */
30975 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30977 /* 3DNow! */
30978 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30980 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
30981 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
30982 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
30983 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30984 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30985 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30986 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30987 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30988 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30990 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30991 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30992 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30993 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30994 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30995 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30996 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30997 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30999 /* SSE */
31000 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
31001 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
31002 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
31004 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
31005 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
31006 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
31007 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
31009 /* SSE or 3DNow!A */
31010 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
31011 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
31013 /* SSE2 */
31014 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
31015 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
31016 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
31017 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
31018 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
31019 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
31020 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
31021 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
31022 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
31023 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
31025 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
31026 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
31028 /* SSE3 */
31029 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
31031 /* SSE4.1 */
31032 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
31034 /* SSE4A */
31035 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
31036 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
31038 /* AVX */
31039 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
31040 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
31042 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
31043 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
31044 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
31045 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
31046 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
31048 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
31049 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
31050 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
31051 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
31052 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
31053 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
31054 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
31056 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
31057 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
31058 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
31060 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
31061 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
31062 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
31063 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
31064 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
31065 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
31066 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
31067 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
31069 /* AVX2 */
31070 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
31071 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
31072 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
31073 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
31074 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
31075 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
31076 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
31077 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
31078 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
31080 /* AVX512F */
31081 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI },
31082 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI },
31083 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI },
31084 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI },
31085 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI },
31086 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI },
31087 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI },
31088 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI },
31089 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI },
31090 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI },
31091 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI },
31092 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI },
31093 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI },
31094 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI },
31095 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI },
31096 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI },
31097 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI },
31098 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI },
31099 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI },
31100 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI },
31101 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
31102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
31103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
31104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
31105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI },
31106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI },
31107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI },
31108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI },
31109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI },
31110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI },
31111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI },
31112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI },
31113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI },
31114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI },
31115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI },
31116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI },
31117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_UQI },
31118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_UQI },
31119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_UQI },
31120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI },
31121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI },
31122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI },
31123 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI },
31124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI },
31125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI },
31126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI },
31127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI },
31129 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
31130 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
31131 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
31132 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
31133 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
31134 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
31136 /* FSGSBASE */
31137 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
31138 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
31139 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
31140 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
31141 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
31142 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
31143 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
31144 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
31146 /* RTM */
31147 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
31148 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
31149 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
31151 /* AVX512BW */
31152 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_USI },
31153 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_UDI },
31154 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_USI },
31155 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_UDI },
31157 /* AVX512VL */
31158 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_loaddquhi256_mask", IX86_BUILTIN_LOADDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_UHI },
31159 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_loaddquhi128_mask", IX86_BUILTIN_LOADDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_UQI },
31160 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_loaddquqi256_mask", IX86_BUILTIN_LOADDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_USI },
31161 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_loaddquqi128_mask", IX86_BUILTIN_LOADDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_UHI },
31162 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64load256_mask", IX86_BUILTIN_MOVDQA64LOAD256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI },
31163 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64load128_mask", IX86_BUILTIN_MOVDQA64LOAD128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI },
31164 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32load256_mask", IX86_BUILTIN_MOVDQA32LOAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI },
31165 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32load128_mask", IX86_BUILTIN_MOVDQA32LOAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI },
31166 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4di_mask, "__builtin_ia32_movdqa64store256_mask", IX86_BUILTIN_MOVDQA64STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_UQI },
31167 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2di_mask, "__builtin_ia32_movdqa64store128_mask", IX86_BUILTIN_MOVDQA64STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_UQI },
31168 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8si_mask, "__builtin_ia32_movdqa32store256_mask", IX86_BUILTIN_MOVDQA32STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_UQI },
31169 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4si_mask, "__builtin_ia32_movdqa32store128_mask", IX86_BUILTIN_MOVDQA32STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_UQI },
31170 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_loadapd256_mask", IX86_BUILTIN_LOADAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_UQI },
31171 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_loadapd128_mask", IX86_BUILTIN_LOADAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_UQI },
31172 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_loadaps256_mask", IX86_BUILTIN_LOADAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_UQI },
31173 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_loadaps128_mask", IX86_BUILTIN_LOADAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_UQI },
31174 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4df_mask, "__builtin_ia32_storeapd256_mask", IX86_BUILTIN_STOREAPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_UQI },
31175 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2df_mask, "__builtin_ia32_storeapd128_mask", IX86_BUILTIN_STOREAPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_UQI },
31176 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8sf_mask, "__builtin_ia32_storeaps256_mask", IX86_BUILTIN_STOREAPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_UQI },
31177 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4sf_mask, "__builtin_ia32_storeaps128_mask", IX86_BUILTIN_STOREAPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_UQI },
31178 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadupd256_mask, "__builtin_ia32_loadupd256_mask", IX86_BUILTIN_LOADUPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_UQI },
31179 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loadupd_mask, "__builtin_ia32_loadupd128_mask", IX86_BUILTIN_LOADUPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_UQI },
31180 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadups256_mask, "__builtin_ia32_loadups256_mask", IX86_BUILTIN_LOADUPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_UQI },
31181 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_loadups_mask, "__builtin_ia32_loadups128_mask", IX86_BUILTIN_LOADUPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_UQI },
31182 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd256_mask, "__builtin_ia32_storeupd256_mask", IX86_BUILTIN_STOREUPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_UQI },
31183 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd_mask, "__builtin_ia32_storeupd128_mask", IX86_BUILTIN_STOREUPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_UQI },
31184 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups256_mask, "__builtin_ia32_storeups256_mask", IX86_BUILTIN_STOREUPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_UQI },
31185 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups_mask, "__builtin_ia32_storeups128_mask", IX86_BUILTIN_STOREUPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_UQI },
31186 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv4di_mask, "__builtin_ia32_loaddqudi256_mask", IX86_BUILTIN_LOADDQUDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI },
31187 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv2di_mask, "__builtin_ia32_loaddqudi128_mask", IX86_BUILTIN_LOADDQUDI128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI },
31188 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv8si_mask, "__builtin_ia32_loaddqusi256_mask", IX86_BUILTIN_LOADDQUSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI },
31189 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv4si_mask, "__builtin_ia32_loaddqusi128_mask", IX86_BUILTIN_LOADDQUSI128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI },
31190 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4di_mask, "__builtin_ia32_storedqudi256_mask", IX86_BUILTIN_STOREDQUDI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_UQI },
31191 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv2di_mask, "__builtin_ia32_storedqudi128_mask", IX86_BUILTIN_STOREDQUDI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_UQI },
31192 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8si_mask, "__builtin_ia32_storedqusi256_mask", IX86_BUILTIN_STOREDQUSI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_UQI },
31193 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4si_mask, "__builtin_ia32_storedqusi128_mask", IX86_BUILTIN_STOREDQUSI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_UQI },
31194 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16hi_mask, "__builtin_ia32_storedquhi256_mask", IX86_BUILTIN_STOREDQUHI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_UHI },
31195 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8hi_mask, "__builtin_ia32_storedquhi128_mask", IX86_BUILTIN_STOREDQUHI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_UQI },
31196 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv32qi_mask, "__builtin_ia32_storedquqi256_mask", IX86_BUILTIN_STOREDQUQI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_USI },
31197 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16qi_mask, "__builtin_ia32_storedquqi128_mask", IX86_BUILTIN_STOREDQUQI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_UHI },
31198 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4df_mask, "__builtin_ia32_compressstoredf256_mask", IX86_BUILTIN_COMPRESSPDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_UQI },
31199 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2df_mask, "__builtin_ia32_compressstoredf128_mask", IX86_BUILTIN_COMPRESSPDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_UQI },
31200 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8sf_mask, "__builtin_ia32_compressstoresf256_mask", IX86_BUILTIN_COMPRESSPSSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_UQI },
31201 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4sf_mask, "__builtin_ia32_compressstoresf128_mask", IX86_BUILTIN_COMPRESSPSSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_UQI },
31202 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4di_mask, "__builtin_ia32_compressstoredi256_mask", IX86_BUILTIN_PCOMPRESSQSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_UQI },
31203 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2di_mask, "__builtin_ia32_compressstoredi128_mask", IX86_BUILTIN_PCOMPRESSQSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_UQI },
31204 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8si_mask, "__builtin_ia32_compressstoresi256_mask", IX86_BUILTIN_PCOMPRESSDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_UQI },
31205 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4si_mask, "__builtin_ia32_compressstoresi128_mask", IX86_BUILTIN_PCOMPRESSDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_UQI },
31206 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expandloaddf256_mask", IX86_BUILTIN_EXPANDPDLOAD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_UQI },
31207 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expandloaddf128_mask", IX86_BUILTIN_EXPANDPDLOAD128, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_UQI },
31208 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandloadsf256_mask", IX86_BUILTIN_EXPANDPSLOAD256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_UQI },
31209 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandloadsf128_mask", IX86_BUILTIN_EXPANDPSLOAD128, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_UQI },
31210 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expandloaddi256_mask", IX86_BUILTIN_PEXPANDQLOAD256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI },
31211 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expandloaddi128_mask", IX86_BUILTIN_PEXPANDQLOAD128, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI },
31212 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandloadsi256_mask", IX86_BUILTIN_PEXPANDDLOAD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI },
31213 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandloadsi128_mask", IX86_BUILTIN_PEXPANDDLOAD128, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI },
31214 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expandloaddf256_maskz", IX86_BUILTIN_EXPANDPDLOAD256Z, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_UQI },
31215 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expandloaddf128_maskz", IX86_BUILTIN_EXPANDPDLOAD128Z, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_UQI },
31216 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandloadsf256_maskz", IX86_BUILTIN_EXPANDPSLOAD256Z, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_UQI },
31217 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandloadsf128_maskz", IX86_BUILTIN_EXPANDPSLOAD128Z, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_UQI },
31218 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expandloaddi256_maskz", IX86_BUILTIN_PEXPANDQLOAD256Z, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI },
31219 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expandloaddi128_maskz", IX86_BUILTIN_PEXPANDQLOAD128Z, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI },
31220 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandloadsi256_maskz", IX86_BUILTIN_PEXPANDDLOAD256Z, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI },
31221 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandloadsi128_maskz", IX86_BUILTIN_PEXPANDDLOAD128Z, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI },
31222 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask_store, "__builtin_ia32_pmovqd256mem_mask", IX86_BUILTIN_PMOVQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_UQI },
31223 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask_store, "__builtin_ia32_pmovqd128mem_mask", IX86_BUILTIN_PMOVQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_UQI },
31224 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask_store, "__builtin_ia32_pmovsqd256mem_mask", IX86_BUILTIN_PMOVSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_UQI },
31225 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask_store, "__builtin_ia32_pmovsqd128mem_mask", IX86_BUILTIN_PMOVSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_UQI },
31226 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask_store, "__builtin_ia32_pmovusqd256mem_mask", IX86_BUILTIN_PMOVUSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_UQI },
31227 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask_store, "__builtin_ia32_pmovusqd128mem_mask", IX86_BUILTIN_PMOVUSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_UQI },
31228 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovqw256mem_mask", IX86_BUILTIN_PMOVQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_UQI },
31229 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovqw128mem_mask", IX86_BUILTIN_PMOVQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_UQI },
31230 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovsqw256mem_mask", IX86_BUILTIN_PMOVSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_UQI },
31231 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovsqw128mem_mask", IX86_BUILTIN_PMOVSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_UQI },
31232 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovusqw256mem_mask", IX86_BUILTIN_PMOVUSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_UQI },
31233 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovusqw128mem_mask", IX86_BUILTIN_PMOVUSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_UQI },
31234 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovqb256mem_mask", IX86_BUILTIN_PMOVQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_UQI },
31235 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovqb128mem_mask", IX86_BUILTIN_PMOVQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_UQI },
31236 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovsqb256mem_mask", IX86_BUILTIN_PMOVSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_UQI },
31237 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovsqb128mem_mask", IX86_BUILTIN_PMOVSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_UQI },
31238 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovusqb256mem_mask", IX86_BUILTIN_PMOVUSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_UQI },
31239 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovusqb128mem_mask", IX86_BUILTIN_PMOVUSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_UQI },
31240 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovdb256mem_mask", IX86_BUILTIN_PMOVDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_UQI },
31241 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovdb128mem_mask", IX86_BUILTIN_PMOVDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_UQI },
31242 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovsdb256mem_mask", IX86_BUILTIN_PMOVSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_UQI },
31243 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovsdb128mem_mask", IX86_BUILTIN_PMOVSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_UQI },
31244 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovusdb256mem_mask", IX86_BUILTIN_PMOVUSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_UQI },
31245 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovusdb128mem_mask", IX86_BUILTIN_PMOVUSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_UQI },
31246 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovdw256mem_mask", IX86_BUILTIN_PMOVDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_UQI },
31247 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovdw128mem_mask", IX86_BUILTIN_PMOVDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_UQI },
31248 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovsdw256mem_mask", IX86_BUILTIN_PMOVSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_UQI },
31249 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovsdw128mem_mask", IX86_BUILTIN_PMOVSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_UQI },
31250 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovusdw256mem_mask", IX86_BUILTIN_PMOVUSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_UQI },
31251 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovusdw128mem_mask", IX86_BUILTIN_PMOVUSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_UQI },
31253 /* PCOMMIT. */
31254 { OPTION_MASK_ISA_PCOMMIT, CODE_FOR_pcommit, "__builtin_ia32_pcommit", IX86_BUILTIN_PCOMMIT, UNKNOWN, (int) VOID_FTYPE_VOID },
31257 /* Builtins with variable number of arguments. */
31258 static const struct builtin_description bdesc_args[] =
31260 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
31261 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
31262 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
31263 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
31264 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
31265 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
31266 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
31268 /* MMX */
31269 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31270 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31271 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31272 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31273 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31274 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31276 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31277 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31278 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31279 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31280 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31281 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31282 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31283 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31285 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31286 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31288 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31289 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31290 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31291 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31293 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31294 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31295 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31296 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31297 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31298 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31300 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31301 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31302 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31303 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31304 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
31305 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
31307 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
31308 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
31309 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
31311 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
31313 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31314 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31315 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
31316 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31317 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31318 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
31320 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31321 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31322 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
31323 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31324 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31325 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
31327 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31328 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31329 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31330 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31332 /* 3DNow! */
31333 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31334 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31335 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31336 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31338 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31339 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31340 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31341 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31342 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31343 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31344 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31345 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31346 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31347 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31348 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31349 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31350 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31351 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31352 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31354 /* 3DNow!A */
31355 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31356 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31357 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31358 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31359 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31360 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31362 /* SSE */
31363 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
31364 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31365 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31366 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31367 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31368 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31369 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31370 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31371 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31372 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31373 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31374 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31376 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31378 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31379 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31380 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31381 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31382 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31383 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31384 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31385 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31387 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31388 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31389 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31390 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31391 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31392 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31393 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31394 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31395 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31396 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31397 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
31398 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31399 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31400 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31401 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31402 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31403 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31404 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31405 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31406 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31408 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31409 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31410 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31411 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31413 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31414 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31415 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31416 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31418 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31420 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31421 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31422 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31423 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31424 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31426 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
31427 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
31428 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
31430 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
31432 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31433 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31434 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31436 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
31437 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
31439 /* SSE MMX or 3Dnow!A */
31440 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31441 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31442 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31444 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31445 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31446 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31447 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31449 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
31450 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
31452 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
31454 /* SSE2 */
31455 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31457 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
31458 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
31459 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31460 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
31461 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
31463 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31464 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31465 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
31466 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31467 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31469 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
31471 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31472 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31473 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31474 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31476 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31477 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
31478 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31480 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31481 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31482 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31483 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31484 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31485 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31486 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31487 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31489 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31490 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31491 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31492 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31493 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
31494 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31495 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31496 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31497 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31498 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31499 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31500 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31501 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31502 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31503 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31504 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31505 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31506 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31507 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31508 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31510 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31511 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31512 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31513 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31515 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31516 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31517 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31518 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31520 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31522 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31523 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31524 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31526 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31528 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31529 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31530 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31531 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31532 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31533 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31534 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31535 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31537 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31538 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31539 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31540 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31541 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31542 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31543 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31544 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31546 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31547 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
31549 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31550 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31551 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31552 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31554 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31555 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31557 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31558 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31559 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31560 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31561 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31562 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31564 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31565 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31566 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31567 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31569 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31570 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31571 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31572 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31573 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31574 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31575 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31576 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31578 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31579 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31580 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31582 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31583 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
31585 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
31586 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31588 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
31590 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
31591 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
31592 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
31593 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
31595 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31596 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31597 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31598 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31599 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31600 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31601 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31603 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31604 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31605 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31606 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31607 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31608 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31609 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31611 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31612 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31613 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31614 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31616 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
31617 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31618 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31620 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
31622 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31624 /* SSE2 MMX */
31625 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31626 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31628 /* SSE3 */
31629 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
31630 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31632 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31633 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31634 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31635 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31636 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31637 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31639 /* SSSE3 */
31640 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31641 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
31642 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31643 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
31644 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31645 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31647 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31648 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31649 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31650 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31651 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31652 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31653 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31654 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31655 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31656 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31657 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31658 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31659 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
31660 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
31661 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31662 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31663 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31664 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31665 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31666 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31667 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31668 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31669 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31670 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31672 /* SSSE3. */
31673 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
31674 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
31676 /* SSE4.1 */
31677 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31678 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31679 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
31680 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
31681 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31682 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31683 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31684 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
31685 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
31686 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
31688 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31689 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31690 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31691 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31692 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31693 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31694 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31695 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31696 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31697 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31698 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31699 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31700 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31702 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31703 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31704 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31705 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31706 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31707 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31708 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31709 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31710 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31711 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31712 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31713 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31715 /* SSE4.1 */
31716 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31717 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31718 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31719 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31721 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
31722 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
31723 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
31724 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
31726 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31727 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31729 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31730 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31732 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
31733 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
31734 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
31735 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
31737 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
31738 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
31740 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31741 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31743 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptestv2di, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31744 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptestv2di, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31745 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptestv2di, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31747 /* SSE4.2 */
31748 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31749 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
31750 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
31751 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31752 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31754 /* SSE4A */
31755 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
31756 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
31757 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
31758 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31760 /* AES */
31761 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
31762 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31764 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31765 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31766 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31767 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31769 /* PCLMUL */
31770 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
31772 /* AVX */
31773 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31774 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31775 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31776 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31777 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31778 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31779 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31780 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31781 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31782 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31783 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31784 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31785 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31786 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31787 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31788 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31789 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31790 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31791 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31792 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31793 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31794 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31795 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31796 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31797 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31798 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31800 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
31801 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
31802 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
31803 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31805 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31806 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31807 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
31808 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
31809 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31810 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31811 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31812 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31813 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31814 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31815 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31816 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31817 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31818 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
31819 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
31820 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
31821 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
31822 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
31823 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
31824 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31825 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
31826 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31827 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31828 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31829 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31830 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31831 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31832 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31833 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31834 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31835 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31836 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
31837 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
31838 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
31840 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31841 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31842 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31844 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31845 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31846 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31847 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31848 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31850 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31852 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31853 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31855 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
31856 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
31857 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
31858 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
31860 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31861 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31863 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31864 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31866 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
31867 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
31868 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
31869 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
31871 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
31872 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
31874 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31875 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31877 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31878 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31879 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31880 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31882 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31883 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31884 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31885 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
31886 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
31887 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
31889 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31890 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31891 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31892 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31893 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31894 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31895 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31896 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31897 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31898 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31899 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31900 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31901 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptestv4di, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31902 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptestv4di, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31903 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptestv4di, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31905 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
31906 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
31908 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31909 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31911 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31913 /* AVX2 */
31914 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
31915 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
31916 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
31917 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
31918 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31919 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31920 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31921 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31922 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31923 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31924 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31925 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31926 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31927 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31928 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31929 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31930 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
31931 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31932 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31933 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31934 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31935 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
31936 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
31937 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31938 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31939 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31940 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31941 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31942 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31943 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31944 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31945 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31946 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31947 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31948 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31949 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31950 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31951 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31952 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
31953 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31954 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31955 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31956 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31957 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31958 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31959 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31960 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31961 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31962 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31963 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31964 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31965 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
31966 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31967 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31968 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31969 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31970 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31971 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31972 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31973 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31974 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31975 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31976 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31977 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31978 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31979 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31980 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31981 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31982 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31983 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31984 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31985 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31986 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31987 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31988 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
31989 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31990 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31991 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31992 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31993 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31994 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31995 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31996 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31997 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31998 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31999 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
32000 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
32001 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
32002 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
32003 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
32004 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
32005 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
32006 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
32007 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
32008 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
32009 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
32010 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
32011 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
32012 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
32013 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32014 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32015 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
32016 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
32017 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32018 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
32019 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32020 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
32021 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32022 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32023 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
32024 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
32025 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32026 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32027 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
32028 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
32029 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
32030 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
32031 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
32032 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
32033 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
32034 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
32035 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
32036 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
32037 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
32038 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
32039 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
32040 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
32041 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
32042 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
32043 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32044 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
32045 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
32046 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
32047 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
32048 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
32049 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
32050 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
32051 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
32052 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32053 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32054 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32055 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32056 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
32057 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
32058 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32059 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32061 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
32063 /* BMI */
32064 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
32065 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
32066 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
32068 /* TBM */
32069 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
32070 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
32072 /* F16C */
32073 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
32074 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
32075 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
32076 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
32078 /* BMI2 */
32079 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
32080 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
32081 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
32082 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
32083 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
32084 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
32086 /* AVX512F */
32087 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI },
32088 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF },
32089 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF },
32090 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI },
32091 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF },
32092 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF },
32093 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI },
32094 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI },
32095 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
32096 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
32097 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
32098 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
32099 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI },
32100 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_UQI },
32101 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI },
32102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_UQI },
32103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_UQI },
32104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI },
32105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_INT_UHI },
32106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_INT_UQI },
32107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
32108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
32109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI },
32110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_UHI },
32111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI },
32112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
32113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
32114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
32115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
32116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
32117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_UQI },
32118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_UQI },
32119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_UQI },
32120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_UQI },
32121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI },
32122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI },
32123 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI },
32124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI },
32125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
32126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
32127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
32128 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
32129 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
32130 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
32131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
32132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
32133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
32134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
32135 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
32136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
32137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
32138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
32139 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
32140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI },
32141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_UHI },
32142 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_UQI },
32143 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_UHI },
32144 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_UQI },
32145 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_UQI },
32146 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI },
32147 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI },
32148 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI },
32149 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI },
32150 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
32151 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
32152 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
32153 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
32154 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
32155 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
32156 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
32157 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
32158 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
32159 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
32160 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
32161 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
32162 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
32163 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
32164 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI },
32165 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI },
32166 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI },
32167 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI },
32168 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI },
32169 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI },
32170 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI },
32171 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI },
32172 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI },
32173 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI },
32174 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_UHI },
32175 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_UQI },
32176 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_UQI },
32177 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_UHI },
32178 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_UQI },
32179 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI },
32180 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI },
32181 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI },
32182 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI },
32183 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI },
32184 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_UHI },
32185 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_UQI },
32186 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_UQI },
32187 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_UHI },
32188 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_UQI },
32189 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_UQI },
32190 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
32191 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_UQI },
32192 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
32193 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
32194 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
32195 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
32196 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
32197 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
32198 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
32199 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
32200 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
32201 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
32202 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
32203 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI },
32204 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
32205 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI },
32206 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
32207 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
32208 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
32209 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI },
32210 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
32211 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI },
32212 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
32213 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
32214 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
32215 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI },
32216 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
32217 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI },
32218 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
32219 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
32220 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
32221 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
32222 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
32223 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI },
32224 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI },
32225 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI },
32226 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI },
32227 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
32228 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
32229 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
32230 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
32231 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
32232 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
32233 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
32234 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
32235 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32236 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32237 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
32238 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
32239 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32240 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32241 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI },
32242 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI },
32243 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI },
32244 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI },
32245 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI },
32246 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI },
32247 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_INT_UHI },
32248 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_INT_UQI },
32249 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
32250 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
32251 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
32252 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
32253 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
32254 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
32255 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
32256 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
32257 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI },
32258 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
32259 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
32260 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI },
32261 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI },
32262 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
32263 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI },
32264 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI },
32265 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI },
32266 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI },
32267 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
32268 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
32269 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_UQI },
32270 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_UQI },
32271 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_UHI },
32272 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_UHI },
32273 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
32274 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
32275 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI },
32276 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
32277 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI },
32278 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
32279 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI },
32280 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI },
32281 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI },
32282 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI },
32284 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
32285 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
32286 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
32287 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
32288 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
32289 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
32290 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
32291 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
32293 /* Mask arithmetic operations */
32294 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
32295 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
32296 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) UHI_FTYPE_UHI },
32297 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
32298 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
32299 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
32300 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
32301 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
32302 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
32303 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) UHI_FTYPE_UHI },
32305 /* SHA */
32306 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32307 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32308 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32309 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
32310 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32311 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32312 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
32314 /* AVX512VL. */
32315 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_palignrv32qi_mask, "__builtin_ia32_palignr256_mask", IX86_BUILTIN_PALIGNR256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_USI_CONVERT },
32316 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_palignrv16qi_mask, "__builtin_ia32_palignr128_mask", IX86_BUILTIN_PALIGNR128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UHI_CONVERT },
32317 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64_256_mask", IX86_BUILTIN_MOVDQA64_256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
32318 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64_128_mask", IX86_BUILTIN_MOVDQA64_128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
32319 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32_256_mask", IX86_BUILTIN_MOVDQA32_256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
32320 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32_128_mask", IX86_BUILTIN_MOVDQA32_128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
32321 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_movapd256_mask", IX86_BUILTIN_MOVAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
32322 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_movapd128_mask", IX86_BUILTIN_MOVAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
32323 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_movaps256_mask", IX86_BUILTIN_MOVAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
32324 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_movaps128_mask", IX86_BUILTIN_MOVAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
32325 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_movdquhi256_mask", IX86_BUILTIN_MOVDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI },
32326 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_movdquhi128_mask", IX86_BUILTIN_MOVDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI },
32327 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_movdquqi256_mask", IX86_BUILTIN_MOVDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI },
32328 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_movdquqi128_mask", IX86_BUILTIN_MOVDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI },
32329 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4sf3_mask, "__builtin_ia32_minps_mask", IX86_BUILTIN_MINPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
32330 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4sf3_mask, "__builtin_ia32_maxps_mask", IX86_BUILTIN_MAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
32331 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2df3_mask, "__builtin_ia32_minpd_mask", IX86_BUILTIN_MINPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
32332 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2df3_mask, "__builtin_ia32_maxpd_mask", IX86_BUILTIN_MAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
32333 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4df3_mask, "__builtin_ia32_maxpd256_mask", IX86_BUILTIN_MAXPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
32334 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8sf3_mask, "__builtin_ia32_maxps256_mask", IX86_BUILTIN_MAXPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
32335 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4df3_mask, "__builtin_ia32_minpd256_mask", IX86_BUILTIN_MINPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
32336 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8sf3_mask, "__builtin_ia32_minps256_mask", IX86_BUILTIN_MINPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
32337 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4sf3_mask, "__builtin_ia32_mulps_mask", IX86_BUILTIN_MULPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
32338 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_divv4sf3_mask, "__builtin_ia32_divps_mask", IX86_BUILTIN_DIVPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
32339 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv2df3_mask, "__builtin_ia32_mulpd_mask", IX86_BUILTIN_MULPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
32340 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_divv2df3_mask, "__builtin_ia32_divpd_mask", IX86_BUILTIN_DIVPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
32341 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv4df3_mask, "__builtin_ia32_divpd256_mask", IX86_BUILTIN_DIVPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
32342 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv8sf3_mask, "__builtin_ia32_divps256_mask", IX86_BUILTIN_DIVPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
32343 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4df3_mask, "__builtin_ia32_mulpd256_mask", IX86_BUILTIN_MULPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
32344 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8sf3_mask, "__builtin_ia32_mulps256_mask", IX86_BUILTIN_MULPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
32345 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2df3_mask, "__builtin_ia32_addpd128_mask", IX86_BUILTIN_ADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
32346 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4df3_mask, "__builtin_ia32_addpd256_mask", IX86_BUILTIN_ADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
32347 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4sf3_mask, "__builtin_ia32_addps128_mask", IX86_BUILTIN_ADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
32348 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8sf3_mask, "__builtin_ia32_addps256_mask", IX86_BUILTIN_ADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
32349 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2df3_mask, "__builtin_ia32_subpd128_mask", IX86_BUILTIN_SUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
32350 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4df3_mask, "__builtin_ia32_subpd256_mask", IX86_BUILTIN_SUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
32351 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4sf3_mask, "__builtin_ia32_subps128_mask", IX86_BUILTIN_SUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
32352 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8sf3_mask, "__builtin_ia32_subps256_mask", IX86_BUILTIN_SUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
32353 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4df3_mask, "__builtin_ia32_xorpd256_mask", IX86_BUILTIN_XORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
32354 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2df3_mask, "__builtin_ia32_xorpd128_mask", IX86_BUILTIN_XORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
32355 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8sf3_mask, "__builtin_ia32_xorps256_mask", IX86_BUILTIN_XORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
32356 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4sf3_mask, "__builtin_ia32_xorps128_mask", IX86_BUILTIN_XORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
32357 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4df3_mask, "__builtin_ia32_orpd256_mask", IX86_BUILTIN_ORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
32358 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2df3_mask, "__builtin_ia32_orpd128_mask", IX86_BUILTIN_ORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
32359 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8sf3_mask, "__builtin_ia32_orps256_mask", IX86_BUILTIN_ORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
32360 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4sf3_mask, "__builtin_ia32_orps128_mask", IX86_BUILTIN_ORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
32361 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8sf_mask, "__builtin_ia32_broadcastf32x2_256_mask", IX86_BUILTIN_BROADCASTF32x2_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_UQI },
32362 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8si_mask, "__builtin_ia32_broadcasti32x2_256_mask", IX86_BUILTIN_BROADCASTI32x2_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_UQI },
32363 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4si_mask, "__builtin_ia32_broadcasti32x2_128_mask", IX86_BUILTIN_BROADCASTI32x2_128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
32364 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4df_mask_1, "__builtin_ia32_broadcastf64x2_256_mask", IX86_BUILTIN_BROADCASTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_UQI },
32365 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4di_mask_1, "__builtin_ia32_broadcasti64x2_256_mask", IX86_BUILTIN_BROADCASTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_UQI },
32366 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8sf_mask_1, "__builtin_ia32_broadcastf32x4_256_mask", IX86_BUILTIN_BROADCASTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_UQI },
32367 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8si_mask_1, "__builtin_ia32_broadcasti32x4_256_mask", IX86_BUILTIN_BROADCASTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_UQI },
32368 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8sf, "__builtin_ia32_extractf32x4_256_mask", IX86_BUILTIN_EXTRACTF32X4_256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT_V4SF_UQI },
32369 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8si, "__builtin_ia32_extracti32x4_256_mask", IX86_BUILTIN_EXTRACTI32X4_256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT_V4SI_UQI },
32370 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv16hi_mask, "__builtin_ia32_dbpsadbw256_mask", IX86_BUILTIN_DBPSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_INT_V16HI_UHI },
32371 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv8hi_mask, "__builtin_ia32_dbpsadbw128_mask", IX86_BUILTIN_DBPSADBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_INT_V8HI_UQI },
32372 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2qq256_mask", IX86_BUILTIN_CVTTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI },
32373 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2qq128_mask", IX86_BUILTIN_CVTTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI },
32374 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2uqq256_mask", IX86_BUILTIN_CVTTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI },
32375 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2uqq128_mask", IX86_BUILTIN_CVTTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI },
32376 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2qq256_mask", IX86_BUILTIN_CVTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI },
32377 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2qq128_mask", IX86_BUILTIN_CVTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI },
32378 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2uqq256_mask", IX86_BUILTIN_CVTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI },
32379 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2uqq128_mask", IX86_BUILTIN_CVTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI },
32380 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4si2_mask, "__builtin_ia32_cvtpd2udq256_mask", IX86_BUILTIN_CVTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI },
32381 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2si2_mask, "__builtin_ia32_cvtpd2udq128_mask", IX86_BUILTIN_CVTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI },
32382 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2qq256_mask", IX86_BUILTIN_CVTTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI },
32383 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2qq128_mask", IX86_BUILTIN_CVTTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI },
32384 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2uqq256_mask", IX86_BUILTIN_CVTTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI },
32385 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2uqq128_mask", IX86_BUILTIN_CVTTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI },
32386 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2dq256_mask", IX86_BUILTIN_CVTTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI },
32387 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2dq128_mask", IX86_BUILTIN_CVTTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI },
32388 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2udq256_mask", IX86_BUILTIN_CVTTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI },
32389 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2udq128_mask", IX86_BUILTIN_CVTTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI },
32390 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2dq256_mask", IX86_BUILTIN_CVTTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI },
32391 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvttpd2dq_mask, "__builtin_ia32_cvttpd2dq128_mask", IX86_BUILTIN_CVTTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI },
32392 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2udq256_mask", IX86_BUILTIN_CVTTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI },
32393 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2si2_mask, "__builtin_ia32_cvttpd2udq128_mask", IX86_BUILTIN_CVTTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI },
32394 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2dq256_mask, "__builtin_ia32_cvtpd2dq256_mask", IX86_BUILTIN_CVTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI },
32395 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2dq_mask, "__builtin_ia32_cvtpd2dq128_mask", IX86_BUILTIN_CVTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI },
32396 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4df2_mask, "__builtin_ia32_cvtdq2pd256_mask", IX86_BUILTIN_CVTDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_UQI },
32397 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtdq2pd_mask, "__builtin_ia32_cvtdq2pd128_mask", IX86_BUILTIN_CVTDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_UQI },
32398 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4df2_mask, "__builtin_ia32_cvtudq2pd256_mask", IX86_BUILTIN_CVTUDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_UQI },
32399 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2siv2df2_mask, "__builtin_ia32_cvtudq2pd128_mask", IX86_BUILTIN_CVTUDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_UQI },
32400 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv8siv8sf2_mask, "__builtin_ia32_cvtdq2ps256_mask", IX86_BUILTIN_CVTDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_UQI },
32401 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4sf2_mask, "__builtin_ia32_cvtdq2ps128_mask", IX86_BUILTIN_CVTDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_UQI },
32402 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv8siv8sf2_mask, "__builtin_ia32_cvtudq2ps256_mask", IX86_BUILTIN_CVTUDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_UQI },
32403 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4sf2_mask, "__builtin_ia32_cvtudq2ps128_mask", IX86_BUILTIN_CVTUDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_UQI },
32404 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtps2pd256_mask, "__builtin_ia32_cvtps2pd256_mask", IX86_BUILTIN_CVTPS2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SF_V4DF_UQI },
32405 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtps2pd_mask, "__builtin_ia32_cvtps2pd128_mask", IX86_BUILTIN_CVTPS2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SF_V2DF_UQI },
32406 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv32qi_mask, "__builtin_ia32_pbroadcastb256_mask", IX86_BUILTIN_PBROADCASTB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16QI_V32QI_USI },
32407 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv32qi_mask, "__builtin_ia32_pbroadcastb256_gpr_mask", IX86_BUILTIN_PBROADCASTB256_GPR_MASK, UNKNOWN, (int) V32QI_FTYPE_QI_V32QI_USI },
32408 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16qi_mask, "__builtin_ia32_pbroadcastb128_mask", IX86_BUILTIN_PBROADCASTB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI },
32409 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16qi_mask, "__builtin_ia32_pbroadcastb128_gpr_mask", IX86_BUILTIN_PBROADCASTB128_GPR_MASK, UNKNOWN, (int) V16QI_FTYPE_QI_V16QI_UHI },
32410 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16hi_mask, "__builtin_ia32_pbroadcastw256_mask", IX86_BUILTIN_PBROADCASTW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8HI_V16HI_UHI },
32411 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16hi_mask, "__builtin_ia32_pbroadcastw256_gpr_mask", IX86_BUILTIN_PBROADCASTW256_GPR_MASK, UNKNOWN, (int) V16HI_FTYPE_HI_V16HI_UHI },
32412 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8hi_mask, "__builtin_ia32_pbroadcastw128_mask", IX86_BUILTIN_PBROADCASTW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI },
32413 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8hi_mask, "__builtin_ia32_pbroadcastw128_gpr_mask", IX86_BUILTIN_PBROADCASTW128_GPR_MASK, UNKNOWN, (int) V8HI_FTYPE_HI_V8HI_UQI },
32414 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8si_mask, "__builtin_ia32_pbroadcastd256_mask", IX86_BUILTIN_PBROADCASTD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_UQI },
32415 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8si_mask, "__builtin_ia32_pbroadcastd256_gpr_mask", IX86_BUILTIN_PBROADCASTD256_GPR_MASK, UNKNOWN, (int) V8SI_FTYPE_SI_V8SI_UQI },
32416 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
32417 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_UQI },
32418 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_UQI },
32419 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_UQI },
32420 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
32421 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_UQI },
32422 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_UQI },
32423 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
32424 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_UQI },
32425 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4df, "__builtin_ia32_extractf64x2_256_mask", IX86_BUILTIN_EXTRACTF64X2_256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT_V2DF_UQI },
32426 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4di, "__builtin_ia32_extracti64x2_256_mask", IX86_BUILTIN_EXTRACTI64X2_256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT_V2DI_UQI },
32427 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8sf, "__builtin_ia32_insertf32x4_256_mask", IX86_BUILTIN_INSERTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT_V8SF_UQI },
32428 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8si, "__builtin_ia32_inserti32x4_256_mask", IX86_BUILTIN_INSERTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT_V8SI_UQI },
32429 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv16qiv16hi2_mask, "__builtin_ia32_pmovsxbw256_mask", IX86_BUILTIN_PMOVSXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_UHI },
32430 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv8qiv8hi2_mask, "__builtin_ia32_pmovsxbw128_mask", IX86_BUILTIN_PMOVSXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_UQI },
32431 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8qiv8si2_mask, "__builtin_ia32_pmovsxbd256_mask", IX86_BUILTIN_PMOVSXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_UQI },
32432 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4qiv4si2_mask, "__builtin_ia32_pmovsxbd128_mask", IX86_BUILTIN_PMOVSXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_UQI },
32433 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4qiv4di2_mask, "__builtin_ia32_pmovsxbq256_mask", IX86_BUILTIN_PMOVSXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_UQI },
32434 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2qiv2di2_mask, "__builtin_ia32_pmovsxbq128_mask", IX86_BUILTIN_PMOVSXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_UQI },
32435 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8hiv8si2_mask, "__builtin_ia32_pmovsxwd256_mask", IX86_BUILTIN_PMOVSXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_UQI },
32436 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4hiv4si2_mask, "__builtin_ia32_pmovsxwd128_mask", IX86_BUILTIN_PMOVSXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_UQI },
32437 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4hiv4di2_mask, "__builtin_ia32_pmovsxwq256_mask", IX86_BUILTIN_PMOVSXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_UQI },
32438 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2hiv2di2_mask, "__builtin_ia32_pmovsxwq128_mask", IX86_BUILTIN_PMOVSXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_UQI },
32439 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4siv4di2_mask, "__builtin_ia32_pmovsxdq256_mask", IX86_BUILTIN_PMOVSXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_UQI },
32440 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2siv2di2_mask, "__builtin_ia32_pmovsxdq128_mask", IX86_BUILTIN_PMOVSXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_UQI },
32441 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv16qiv16hi2_mask, "__builtin_ia32_pmovzxbw256_mask", IX86_BUILTIN_PMOVZXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_UHI },
32442 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv8qiv8hi2_mask, "__builtin_ia32_pmovzxbw128_mask", IX86_BUILTIN_PMOVZXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_UQI },
32443 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8qiv8si2_mask, "__builtin_ia32_pmovzxbd256_mask", IX86_BUILTIN_PMOVZXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_UQI },
32444 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4qiv4si2_mask, "__builtin_ia32_pmovzxbd128_mask", IX86_BUILTIN_PMOVZXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_UQI },
32445 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4qiv4di2_mask, "__builtin_ia32_pmovzxbq256_mask", IX86_BUILTIN_PMOVZXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_UQI },
32446 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2qiv2di2_mask, "__builtin_ia32_pmovzxbq128_mask", IX86_BUILTIN_PMOVZXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_UQI },
32447 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8hiv8si2_mask, "__builtin_ia32_pmovzxwd256_mask", IX86_BUILTIN_PMOVZXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_UQI },
32448 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4hiv4si2_mask, "__builtin_ia32_pmovzxwd128_mask", IX86_BUILTIN_PMOVZXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_UQI },
32449 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4hiv4di2_mask, "__builtin_ia32_pmovzxwq256_mask", IX86_BUILTIN_PMOVZXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_UQI },
32450 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2hiv2di2_mask, "__builtin_ia32_pmovzxwq128_mask", IX86_BUILTIN_PMOVZXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_UQI },
32451 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4siv4di2_mask, "__builtin_ia32_pmovzxdq256_mask", IX86_BUILTIN_PMOVZXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_UQI },
32452 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2siv2di2_mask, "__builtin_ia32_pmovzxdq128_mask", IX86_BUILTIN_PMOVZXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_UQI },
32453 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4df_mask, "__builtin_ia32_reducepd256_mask", IX86_BUILTIN_REDUCEPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
32454 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv2df_mask, "__builtin_ia32_reducepd128_mask", IX86_BUILTIN_REDUCEPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_UQI },
32455 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv8sf_mask, "__builtin_ia32_reduceps256_mask", IX86_BUILTIN_REDUCEPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI },
32456 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4sf_mask, "__builtin_ia32_reduceps128_mask", IX86_BUILTIN_REDUCEPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_UQI },
32457 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv2df, "__builtin_ia32_reducesd", IX86_BUILTIN_REDUCESD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32458 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv4sf, "__builtin_ia32_reducess", IX86_BUILTIN_REDUCESS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32459 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16hi_mask, "__builtin_ia32_permvarhi256_mask", IX86_BUILTIN_VPERMVARHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
32460 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv8hi_mask, "__builtin_ia32_permvarhi128_mask", IX86_BUILTIN_VPERMVARHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32461 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_mask, "__builtin_ia32_vpermt2varhi256_mask", IX86_BUILTIN_VPERMT2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
32462 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_maskz, "__builtin_ia32_vpermt2varhi256_maskz", IX86_BUILTIN_VPERMT2VARHI256_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
32463 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_mask, "__builtin_ia32_vpermt2varhi128_mask", IX86_BUILTIN_VPERMT2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32464 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_maskz, "__builtin_ia32_vpermt2varhi128_maskz", IX86_BUILTIN_VPERMT2VARHI128_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32465 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16hi3_mask, "__builtin_ia32_vpermi2varhi256_mask", IX86_BUILTIN_VPERMI2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
32466 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8hi3_mask, "__builtin_ia32_vpermi2varhi128_mask", IX86_BUILTIN_VPERMI2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32467 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4df_mask, "__builtin_ia32_rcp14pd256_mask", IX86_BUILTIN_RCP14PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
32468 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v2df_mask, "__builtin_ia32_rcp14pd128_mask", IX86_BUILTIN_RCP14PD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
32469 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v8sf_mask, "__builtin_ia32_rcp14ps256_mask", IX86_BUILTIN_RCP14PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
32470 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4sf_mask, "__builtin_ia32_rcp14ps128_mask", IX86_BUILTIN_RCP14PS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
32471 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4df_mask, "__builtin_ia32_rsqrt14pd256_mask", IX86_BUILTIN_RSQRT14PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
32472 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v2df_mask, "__builtin_ia32_rsqrt14pd128_mask", IX86_BUILTIN_RSQRT14PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
32473 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v8sf_mask, "__builtin_ia32_rsqrt14ps256_mask", IX86_BUILTIN_RSQRT14PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
32474 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4sf_mask, "__builtin_ia32_rsqrt14ps128_mask", IX86_BUILTIN_RSQRT14PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
32475 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv4df2_mask, "__builtin_ia32_sqrtpd256_mask", IX86_BUILTIN_SQRTPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
32476 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sqrtv2df2_mask, "__builtin_ia32_sqrtpd128_mask", IX86_BUILTIN_SQRTPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
32477 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv8sf2_mask, "__builtin_ia32_sqrtps256_mask", IX86_BUILTIN_SQRTPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
32478 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_sqrtv4sf2_mask, "__builtin_ia32_sqrtps128_mask", IX86_BUILTIN_SQRTPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
32479 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16qi3_mask, "__builtin_ia32_paddb128_mask", IX86_BUILTIN_PADDB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
32480 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8hi3_mask, "__builtin_ia32_paddw128_mask", IX86_BUILTIN_PADDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32481 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4si3_mask, "__builtin_ia32_paddd128_mask", IX86_BUILTIN_PADDD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
32482 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2di3_mask, "__builtin_ia32_paddq128_mask", IX86_BUILTIN_PADDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
32483 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16qi3_mask, "__builtin_ia32_psubb128_mask", IX86_BUILTIN_PSUBB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
32484 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8hi3_mask, "__builtin_ia32_psubw128_mask", IX86_BUILTIN_PSUBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32485 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4si3_mask, "__builtin_ia32_psubd128_mask", IX86_BUILTIN_PSUBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
32486 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2di3_mask, "__builtin_ia32_psubq128_mask", IX86_BUILTIN_PSUBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
32487 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv16qi3_mask, "__builtin_ia32_paddsb128_mask", IX86_BUILTIN_PADDSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
32488 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv8hi3_mask, "__builtin_ia32_paddsw128_mask", IX86_BUILTIN_PADDSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32489 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv16qi3_mask, "__builtin_ia32_psubsb128_mask", IX86_BUILTIN_PSUBSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
32490 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv8hi3_mask, "__builtin_ia32_psubsw128_mask", IX86_BUILTIN_PSUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32491 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv16qi3_mask, "__builtin_ia32_paddusb128_mask", IX86_BUILTIN_PADDUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
32492 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv8hi3_mask, "__builtin_ia32_paddusw128_mask", IX86_BUILTIN_PADDUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32493 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv16qi3_mask, "__builtin_ia32_psubusb128_mask", IX86_BUILTIN_PSUBUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
32494 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv8hi3_mask, "__builtin_ia32_psubusw128_mask", IX86_BUILTIN_PSUBUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32495 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv32qi3_mask, "__builtin_ia32_paddb256_mask", IX86_BUILTIN_PADDB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
32496 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16hi3_mask, "__builtin_ia32_paddw256_mask", IX86_BUILTIN_PADDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
32497 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8si3_mask, "__builtin_ia32_paddd256_mask", IX86_BUILTIN_PADDD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
32498 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4di3_mask, "__builtin_ia32_paddq256_mask", IX86_BUILTIN_PADDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
32499 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv32qi3_mask, "__builtin_ia32_paddsb256_mask", IX86_BUILTIN_PADDSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
32500 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv16hi3_mask, "__builtin_ia32_paddsw256_mask", IX86_BUILTIN_PADDSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
32501 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv32qi3_mask, "__builtin_ia32_paddusb256_mask", IX86_BUILTIN_PADDUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
32502 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv16hi3_mask, "__builtin_ia32_paddusw256_mask", IX86_BUILTIN_PADDUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
32503 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv32qi3_mask, "__builtin_ia32_psubb256_mask", IX86_BUILTIN_PSUBB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
32504 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16hi3_mask, "__builtin_ia32_psubw256_mask", IX86_BUILTIN_PSUBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
32505 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8si3_mask, "__builtin_ia32_psubd256_mask", IX86_BUILTIN_PSUBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
32506 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4di3_mask, "__builtin_ia32_psubq256_mask", IX86_BUILTIN_PSUBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
32507 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv32qi3_mask, "__builtin_ia32_psubsb256_mask", IX86_BUILTIN_PSUBSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
32508 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv16hi3_mask, "__builtin_ia32_psubsw256_mask", IX86_BUILTIN_PSUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
32509 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv32qi3_mask, "__builtin_ia32_psubusb256_mask", IX86_BUILTIN_PSUBUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
32510 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv16hi3_mask, "__builtin_ia32_psubusw256_mask", IX86_BUILTIN_PSUBUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
32511 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_256_mask", IX86_BUILTIN_SHUF_F64x2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI },
32512 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_256_mask", IX86_BUILTIN_SHUF_I64x2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI },
32513 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_256_mask", IX86_BUILTIN_SHUF_I32x4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI },
32514 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_256_mask", IX86_BUILTIN_SHUF_F32x4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI },
32515 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovwb128_mask", IX86_BUILTIN_PMOVWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_UQI },
32516 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovwb256_mask", IX86_BUILTIN_PMOVWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_UHI },
32517 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovswb128_mask", IX86_BUILTIN_PMOVSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_UQI },
32518 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovswb256_mask", IX86_BUILTIN_PMOVSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_UHI },
32519 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovuswb128_mask", IX86_BUILTIN_PMOVUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_UQI },
32520 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovuswb256_mask", IX86_BUILTIN_PMOVUSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_UHI },
32521 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask, "__builtin_ia32_pmovdb128_mask", IX86_BUILTIN_PMOVDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_UQI },
32522 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask, "__builtin_ia32_pmovdb256_mask", IX86_BUILTIN_PMOVDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_UQI },
32523 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask, "__builtin_ia32_pmovsdb128_mask", IX86_BUILTIN_PMOVSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_UQI },
32524 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask, "__builtin_ia32_pmovsdb256_mask", IX86_BUILTIN_PMOVSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_UQI },
32525 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask, "__builtin_ia32_pmovusdb128_mask", IX86_BUILTIN_PMOVUSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_UQI },
32526 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask, "__builtin_ia32_pmovusdb256_mask", IX86_BUILTIN_PMOVUSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_UQI },
32527 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask, "__builtin_ia32_pmovdw128_mask", IX86_BUILTIN_PMOVDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_UQI },
32528 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask, "__builtin_ia32_pmovdw256_mask", IX86_BUILTIN_PMOVDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_UQI },
32529 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask, "__builtin_ia32_pmovsdw128_mask", IX86_BUILTIN_PMOVSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_UQI },
32530 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask, "__builtin_ia32_pmovsdw256_mask", IX86_BUILTIN_PMOVSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_UQI },
32531 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask, "__builtin_ia32_pmovusdw128_mask", IX86_BUILTIN_PMOVUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_UQI },
32532 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask, "__builtin_ia32_pmovusdw256_mask", IX86_BUILTIN_PMOVUSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_UQI },
32533 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask, "__builtin_ia32_pmovqb128_mask", IX86_BUILTIN_PMOVQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_UQI },
32534 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask, "__builtin_ia32_pmovqb256_mask", IX86_BUILTIN_PMOVQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_UQI },
32535 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask, "__builtin_ia32_pmovsqb128_mask", IX86_BUILTIN_PMOVSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_UQI },
32536 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask, "__builtin_ia32_pmovsqb256_mask", IX86_BUILTIN_PMOVSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_UQI },
32537 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask, "__builtin_ia32_pmovusqb128_mask", IX86_BUILTIN_PMOVUSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_UQI },
32538 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask, "__builtin_ia32_pmovusqb256_mask", IX86_BUILTIN_PMOVUSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_UQI },
32539 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask, "__builtin_ia32_pmovqw128_mask", IX86_BUILTIN_PMOVQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_UQI },
32540 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask, "__builtin_ia32_pmovqw256_mask", IX86_BUILTIN_PMOVQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_UQI },
32541 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask, "__builtin_ia32_pmovsqw128_mask", IX86_BUILTIN_PMOVSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_UQI },
32542 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask, "__builtin_ia32_pmovsqw256_mask", IX86_BUILTIN_PMOVSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_UQI },
32543 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask, "__builtin_ia32_pmovusqw128_mask", IX86_BUILTIN_PMOVUSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_UQI },
32544 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask, "__builtin_ia32_pmovusqw256_mask", IX86_BUILTIN_PMOVUSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_UQI },
32545 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask, "__builtin_ia32_pmovqd128_mask", IX86_BUILTIN_PMOVQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_UQI },
32546 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask, "__builtin_ia32_pmovqd256_mask", IX86_BUILTIN_PMOVQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_UQI },
32547 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask, "__builtin_ia32_pmovsqd128_mask", IX86_BUILTIN_PMOVSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_UQI },
32548 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask, "__builtin_ia32_pmovsqd256_mask", IX86_BUILTIN_PMOVSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_UQI },
32549 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask, "__builtin_ia32_pmovusqd128_mask", IX86_BUILTIN_PMOVUSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_UQI },
32550 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask, "__builtin_ia32_pmovusqd256_mask", IX86_BUILTIN_PMOVUSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_UQI },
32551 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4df_mask, "__builtin_ia32_rangepd256_mask", IX86_BUILTIN_RANGEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI },
32552 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv2df_mask, "__builtin_ia32_rangepd128_mask", IX86_BUILTIN_RANGEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI },
32553 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv8sf_mask, "__builtin_ia32_rangeps256_mask", IX86_BUILTIN_RANGEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI },
32554 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4sf_mask, "__builtin_ia32_rangeps128_mask", IX86_BUILTIN_RANGEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI },
32555 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv8sf_mask, "__builtin_ia32_getexpps256_mask", IX86_BUILTIN_GETEXPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
32556 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4df_mask, "__builtin_ia32_getexppd256_mask", IX86_BUILTIN_GETEXPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
32557 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4sf_mask, "__builtin_ia32_getexpps128_mask", IX86_BUILTIN_GETEXPPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
32558 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv2df_mask, "__builtin_ia32_getexppd128_mask", IX86_BUILTIN_GETEXPPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
32559 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_mask, "__builtin_ia32_fixupimmpd256_mask", IX86_BUILTIN_FIXUPIMMPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI },
32560 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_maskz, "__builtin_ia32_fixupimmpd256_maskz", IX86_BUILTIN_FIXUPIMMPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI },
32561 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_mask, "__builtin_ia32_fixupimmps256_mask", IX86_BUILTIN_FIXUPIMMPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI },
32562 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_maskz, "__builtin_ia32_fixupimmps256_maskz", IX86_BUILTIN_FIXUPIMMPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI },
32563 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_mask, "__builtin_ia32_fixupimmpd128_mask", IX86_BUILTIN_FIXUPIMMPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_UQI },
32564 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_maskz, "__builtin_ia32_fixupimmpd128_maskz", IX86_BUILTIN_FIXUPIMMPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_UQI },
32565 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_mask, "__builtin_ia32_fixupimmps128_mask", IX86_BUILTIN_FIXUPIMMPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_UQI },
32566 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_maskz, "__builtin_ia32_fixupimmps128_maskz", IX86_BUILTIN_FIXUPIMMPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_UQI },
32567 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4di2_mask, "__builtin_ia32_pabsq256_mask", IX86_BUILTIN_PABSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
32568 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv2di2_mask, "__builtin_ia32_pabsq128_mask", IX86_BUILTIN_PABSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
32569 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8si2_mask, "__builtin_ia32_pabsd256_mask", IX86_BUILTIN_PABSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
32570 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4si2_mask, "__builtin_ia32_pabsd128_mask", IX86_BUILTIN_PABSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
32571 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pmulhrswv16hi3_mask , "__builtin_ia32_pmulhrsw256_mask", IX86_BUILTIN_PMULHRSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
32572 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pmulhrswv8hi3_mask, "__builtin_ia32_pmulhrsw128_mask", IX86_BUILTIN_PMULHRSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32573 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv8hi3_highpart_mask, "__builtin_ia32_pmulhuw128_mask", IX86_BUILTIN_PMULHUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32574 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv16hi3_highpart_mask, "__builtin_ia32_pmulhuw256_mask" , IX86_BUILTIN_PMULHUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
32575 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv16hi3_highpart_mask, "__builtin_ia32_pmulhw256_mask" , IX86_BUILTIN_PMULHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
32576 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv8hi3_highpart_mask, "__builtin_ia32_pmulhw128_mask", IX86_BUILTIN_PMULHW128_MASK, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32577 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv16hi3_mask, "__builtin_ia32_pmullw256_mask" , IX86_BUILTIN_PMULLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
32578 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8hi3_mask, "__builtin_ia32_pmullw128_mask", IX86_BUILTIN_PMULLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32579 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv4di3_mask, "__builtin_ia32_pmullq256_mask", IX86_BUILTIN_PMULLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
32580 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv2di3_mask, "__builtin_ia32_pmullq128_mask", IX86_BUILTIN_PMULLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
32581 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4df3_mask, "__builtin_ia32_andpd256_mask", IX86_BUILTIN_ANDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
32582 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2df3_mask, "__builtin_ia32_andpd128_mask", IX86_BUILTIN_ANDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
32583 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8sf3_mask, "__builtin_ia32_andps256_mask", IX86_BUILTIN_ANDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
32584 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4sf3_mask, "__builtin_ia32_andps128_mask", IX86_BUILTIN_ANDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
32585 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv4df3_mask, "__builtin_ia32_andnpd256_mask", IX86_BUILTIN_ANDNPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
32586 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2df3_mask, "__builtin_ia32_andnpd128_mask", IX86_BUILTIN_ANDNPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
32587 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv8sf3_mask, "__builtin_ia32_andnps256_mask", IX86_BUILTIN_ANDNPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
32588 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_andnotv4sf3_mask, "__builtin_ia32_andnps128_mask", IX86_BUILTIN_ANDNPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
32589 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllwi128_mask", IX86_BUILTIN_PSLLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
32590 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslldi128_mask", IX86_BUILTIN_PSLLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
32591 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllqi128_mask", IX86_BUILTIN_PSLLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
32592 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllw128_mask", IX86_BUILTIN_PSLLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32593 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslld128_mask", IX86_BUILTIN_PSLLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
32594 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllq128_mask", IX86_BUILTIN_PSLLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
32595 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllwi256_mask", IX86_BUILTIN_PSLLWI256_MASK , UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
32596 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllw256_mask", IX86_BUILTIN_PSLLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_UHI },
32597 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslldi256_mask", IX86_BUILTIN_PSLLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
32598 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslld256_mask", IX86_BUILTIN_PSLLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_UQI },
32599 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllqi256_mask", IX86_BUILTIN_PSLLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
32600 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllq256_mask", IX86_BUILTIN_PSLLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_UQI },
32601 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psradi128_mask", IX86_BUILTIN_PSRADI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
32602 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psrad128_mask", IX86_BUILTIN_PSRAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
32603 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psradi256_mask", IX86_BUILTIN_PSRADI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
32604 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psrad256_mask", IX86_BUILTIN_PSRAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_UQI },
32605 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraqi128_mask", IX86_BUILTIN_PSRAQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
32606 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraq128_mask", IX86_BUILTIN_PSRAQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
32607 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraqi256_mask", IX86_BUILTIN_PSRAQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
32608 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraq256_mask", IX86_BUILTIN_PSRAQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_UQI },
32609 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8si3_mask, "__builtin_ia32_pandd256_mask", IX86_BUILTIN_PANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
32610 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4si3_mask, "__builtin_ia32_pandd128_mask", IX86_BUILTIN_PANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
32611 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrldi128_mask", IX86_BUILTIN_PSRLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
32612 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrld128_mask", IX86_BUILTIN_PSRLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
32613 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrldi256_mask", IX86_BUILTIN_PSRLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
32614 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrld256_mask", IX86_BUILTIN_PSRLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_UQI },
32615 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlqi128_mask", IX86_BUILTIN_PSRLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
32616 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlq128_mask", IX86_BUILTIN_PSRLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
32617 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlqi256_mask", IX86_BUILTIN_PSRLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
32618 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlq256_mask", IX86_BUILTIN_PSRLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_UQI },
32619 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4di3_mask, "__builtin_ia32_pandq256_mask", IX86_BUILTIN_PANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
32620 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2di3_mask, "__builtin_ia32_pandq128_mask", IX86_BUILTIN_PANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
32621 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv8si3_mask, "__builtin_ia32_pandnd256_mask", IX86_BUILTIN_PANDND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
32622 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv4si3_mask, "__builtin_ia32_pandnd128_mask", IX86_BUILTIN_PANDND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
32623 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv4di3_mask, "__builtin_ia32_pandnq256_mask", IX86_BUILTIN_PANDNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
32624 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2di3_mask, "__builtin_ia32_pandnq128_mask", IX86_BUILTIN_PANDNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
32625 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8si3_mask, "__builtin_ia32_pord256_mask", IX86_BUILTIN_PORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
32626 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4si3_mask, "__builtin_ia32_pord128_mask", IX86_BUILTIN_PORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
32627 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4di3_mask, "__builtin_ia32_porq256_mask", IX86_BUILTIN_PORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
32628 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2di3_mask, "__builtin_ia32_porq128_mask", IX86_BUILTIN_PORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
32629 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8si3_mask, "__builtin_ia32_pxord256_mask", IX86_BUILTIN_PXORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
32630 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4si3_mask, "__builtin_ia32_pxord128_mask", IX86_BUILTIN_PXORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
32631 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4di3_mask, "__builtin_ia32_pxorq256_mask", IX86_BUILTIN_PXORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
32632 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2di3_mask, "__builtin_ia32_pxorq128_mask", IX86_BUILTIN_PXORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
32633 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packsswb_mask, "__builtin_ia32_packsswb256_mask", IX86_BUILTIN_PACKSSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_USI },
32634 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packsswb_mask, "__builtin_ia32_packsswb128_mask", IX86_BUILTIN_PACKSSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_UHI },
32635 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packuswb_mask, "__builtin_ia32_packuswb256_mask", IX86_BUILTIN_PACKUSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_USI },
32636 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packuswb_mask, "__builtin_ia32_packuswb128_mask", IX86_BUILTIN_PACKUSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_UHI },
32637 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev8sf_mask, "__builtin_ia32_rndscaleps_256_mask", IX86_BUILTIN_RNDSCALEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI },
32638 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4df_mask, "__builtin_ia32_rndscalepd_256_mask", IX86_BUILTIN_RNDSCALEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
32639 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4sf_mask, "__builtin_ia32_rndscaleps_128_mask", IX86_BUILTIN_RNDSCALEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_UQI },
32640 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev2df_mask, "__builtin_ia32_rndscalepd_128_mask", IX86_BUILTIN_RNDSCALEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_UQI },
32641 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_mask, "__builtin_ia32_pternlogq256_mask", IX86_BUILTIN_VTERNLOGQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI },
32642 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_maskz, "__builtin_ia32_pternlogq256_maskz", IX86_BUILTIN_VTERNLOGQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI },
32643 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_mask, "__builtin_ia32_pternlogd256_mask", IX86_BUILTIN_VTERNLOGD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI },
32644 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_maskz, "__builtin_ia32_pternlogd256_maskz", IX86_BUILTIN_VTERNLOGD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI },
32645 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_mask, "__builtin_ia32_pternlogq128_mask", IX86_BUILTIN_VTERNLOGQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI },
32646 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_maskz, "__builtin_ia32_pternlogq128_maskz", IX86_BUILTIN_VTERNLOGQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI },
32647 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_mask, "__builtin_ia32_pternlogd128_mask", IX86_BUILTIN_VTERNLOGD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI },
32648 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_maskz, "__builtin_ia32_pternlogd128_maskz", IX86_BUILTIN_VTERNLOGD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI },
32649 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4df_mask, "__builtin_ia32_scalefpd256_mask", IX86_BUILTIN_SCALEFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
32650 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv8sf_mask, "__builtin_ia32_scalefps256_mask", IX86_BUILTIN_SCALEFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
32651 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv2df_mask, "__builtin_ia32_scalefpd128_mask", IX86_BUILTIN_SCALEFPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
32652 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4sf_mask, "__builtin_ia32_scalefps128_mask", IX86_BUILTIN_SCALEFPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
32653 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask, "__builtin_ia32_vfmaddpd256_mask", IX86_BUILTIN_VFMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
32654 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask3, "__builtin_ia32_vfmaddpd256_mask3", IX86_BUILTIN_VFMADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
32655 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_maskz, "__builtin_ia32_vfmaddpd256_maskz", IX86_BUILTIN_VFMADDPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
32656 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask, "__builtin_ia32_vfmaddpd128_mask", IX86_BUILTIN_VFMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
32657 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask3, "__builtin_ia32_vfmaddpd128_mask3", IX86_BUILTIN_VFMADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
32658 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_maskz, "__builtin_ia32_vfmaddpd128_maskz", IX86_BUILTIN_VFMADDPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
32659 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask, "__builtin_ia32_vfmaddps256_mask", IX86_BUILTIN_VFMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
32660 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask3, "__builtin_ia32_vfmaddps256_mask3", IX86_BUILTIN_VFMADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
32661 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_maskz, "__builtin_ia32_vfmaddps256_maskz", IX86_BUILTIN_VFMADDPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
32662 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask, "__builtin_ia32_vfmaddps128_mask", IX86_BUILTIN_VFMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
32663 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask3, "__builtin_ia32_vfmaddps128_mask3", IX86_BUILTIN_VFMADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
32664 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_maskz, "__builtin_ia32_vfmaddps128_maskz", IX86_BUILTIN_VFMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
32665 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask3, "__builtin_ia32_vfmsubpd256_mask3", IX86_BUILTIN_VFMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
32666 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask3, "__builtin_ia32_vfmsubpd128_mask3", IX86_BUILTIN_VFMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
32667 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask3, "__builtin_ia32_vfmsubps256_mask3", IX86_BUILTIN_VFMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
32668 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
32669 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
32670 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
32671 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
32672 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask, "__builtin_ia32_vfnmaddps128_mask", IX86_BUILTIN_VFNMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
32673 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
32674 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
32675 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
32676 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask3, "__builtin_ia32_vfnmsubpd128_mask3", IX86_BUILTIN_VFNMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
32677 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask, "__builtin_ia32_vfnmsubps256_mask", IX86_BUILTIN_VFNMSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
32678 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask3, "__builtin_ia32_vfnmsubps256_mask3", IX86_BUILTIN_VFNMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
32679 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask, "__builtin_ia32_vfnmsubps128_mask", IX86_BUILTIN_VFNMSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
32680 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask3, "__builtin_ia32_vfnmsubps128_mask3", IX86_BUILTIN_VFNMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
32681 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask, "__builtin_ia32_vfmaddsubpd256_mask", IX86_BUILTIN_VFMADDSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
32682 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask3, "__builtin_ia32_vfmaddsubpd256_mask3", IX86_BUILTIN_VFMADDSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
32683 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_maskz, "__builtin_ia32_vfmaddsubpd256_maskz", IX86_BUILTIN_VFMADDSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
32684 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask, "__builtin_ia32_vfmaddsubpd128_mask", IX86_BUILTIN_VFMADDSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
32685 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask3, "__builtin_ia32_vfmaddsubpd128_mask3", IX86_BUILTIN_VFMADDSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
32686 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_maskz, "__builtin_ia32_vfmaddsubpd128_maskz", IX86_BUILTIN_VFMADDSUBPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
32687 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask, "__builtin_ia32_vfmaddsubps256_mask", IX86_BUILTIN_VFMADDSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
32688 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask3, "__builtin_ia32_vfmaddsubps256_mask3", IX86_BUILTIN_VFMADDSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
32689 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_maskz, "__builtin_ia32_vfmaddsubps256_maskz", IX86_BUILTIN_VFMADDSUBPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
32690 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask, "__builtin_ia32_vfmaddsubps128_mask", IX86_BUILTIN_VFMADDSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
32691 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask3, "__builtin_ia32_vfmaddsubps128_mask3", IX86_BUILTIN_VFMADDSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
32692 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_maskz, "__builtin_ia32_vfmaddsubps128_maskz", IX86_BUILTIN_VFMADDSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
32693 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4df_mask3, "__builtin_ia32_vfmsubaddpd256_mask3", IX86_BUILTIN_VFMSUBADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
32694 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v2df_mask3, "__builtin_ia32_vfmsubaddpd128_mask3", IX86_BUILTIN_VFMSUBADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
32695 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v8sf_mask3, "__builtin_ia32_vfmsubaddps256_mask3", IX86_BUILTIN_VFMSUBADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
32696 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4sf_mask3, "__builtin_ia32_vfmsubaddps128_mask3", IX86_BUILTIN_VFMSUBADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
32697 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4df, "__builtin_ia32_insertf64x2_256_mask", IX86_BUILTIN_INSERTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT_V4DF_UQI },
32698 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4di, "__builtin_ia32_inserti64x2_256_mask", IX86_BUILTIN_INSERTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT_V4DI_UQI },
32699 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv16hi_mask, "__builtin_ia32_psrav16hi_mask", IX86_BUILTIN_PSRAVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
32700 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv8hi_mask, "__builtin_ia32_psrav8hi_mask", IX86_BUILTIN_PSRAVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32701 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v16hi_mask, "__builtin_ia32_pmaddubsw256_mask", IX86_BUILTIN_PMADDUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_V16HI_UHI },
32702 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v8hi_mask, "__builtin_ia32_pmaddubsw128_mask", IX86_BUILTIN_PMADDUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_V8HI_UQI },
32703 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v16hi_mask, "__builtin_ia32_pmaddwd256_mask", IX86_BUILTIN_PMADDWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI_V8SI_UQI },
32704 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v8hi_mask, "__builtin_ia32_pmaddwd128_mask", IX86_BUILTIN_PMADDWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI_V4SI_UQI },
32705 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv16hi_mask, "__builtin_ia32_psrlv16hi_mask", IX86_BUILTIN_PSRLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
32706 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv8hi_mask, "__builtin_ia32_psrlv8hi_mask", IX86_BUILTIN_PSRLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32707 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_fix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2dq256_mask", IX86_BUILTIN_CVTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI },
32708 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_fix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2dq128_mask", IX86_BUILTIN_CVTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI },
32709 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2udq256_mask", IX86_BUILTIN_CVTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI },
32710 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2udq128_mask", IX86_BUILTIN_CVTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI },
32711 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv4di_mask, "__builtin_ia32_cvtps2qq256_mask", IX86_BUILTIN_CVTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI },
32712 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv2di_mask, "__builtin_ia32_cvtps2qq128_mask", IX86_BUILTIN_CVTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI },
32713 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv4di_mask, "__builtin_ia32_cvtps2uqq256_mask", IX86_BUILTIN_CVTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI },
32714 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv2di_mask, "__builtin_ia32_cvtps2uqq128_mask", IX86_BUILTIN_CVTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI },
32715 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv8sf_mask, "__builtin_ia32_getmantps256_mask", IX86_BUILTIN_GETMANTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI },
32716 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4sf_mask, "__builtin_ia32_getmantps128_mask", IX86_BUILTIN_GETMANTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_UQI },
32717 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4df_mask, "__builtin_ia32_getmantpd256_mask", IX86_BUILTIN_GETMANTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
32718 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv2df_mask, "__builtin_ia32_getmantpd128_mask", IX86_BUILTIN_GETMANTPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_UQI },
32719 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movddup256_mask, "__builtin_ia32_movddup256_mask", IX86_BUILTIN_MOVDDUP256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
32720 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_dupv2df_mask, "__builtin_ia32_movddup128_mask", IX86_BUILTIN_MOVDDUP128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
32721 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movshdup256_mask, "__builtin_ia32_movshdup256_mask", IX86_BUILTIN_MOVSHDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
32722 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movshdup_mask, "__builtin_ia32_movshdup128_mask", IX86_BUILTIN_MOVSHDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
32723 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movsldup256_mask, "__builtin_ia32_movsldup256_mask", IX86_BUILTIN_MOVSLDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
32724 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movsldup_mask, "__builtin_ia32_movsldup128_mask", IX86_BUILTIN_MOVSLDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
32725 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4sf2_mask, "__builtin_ia32_cvtqq2ps256_mask", IX86_BUILTIN_CVTQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_UQI },
32726 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2sf2_mask, "__builtin_ia32_cvtqq2ps128_mask", IX86_BUILTIN_CVTQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_UQI },
32727 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4sf2_mask, "__builtin_ia32_cvtuqq2ps256_mask", IX86_BUILTIN_CVTUQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_UQI },
32728 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2sf2_mask, "__builtin_ia32_cvtuqq2ps128_mask", IX86_BUILTIN_CVTUQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_UQI },
32729 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4df2_mask, "__builtin_ia32_cvtqq2pd256_mask", IX86_BUILTIN_CVTQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_UQI },
32730 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2df2_mask, "__builtin_ia32_cvtqq2pd128_mask", IX86_BUILTIN_CVTQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_UQI },
32731 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4df2_mask, "__builtin_ia32_cvtuqq2pd256_mask", IX86_BUILTIN_CVTUQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_UQI },
32732 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2df2_mask, "__builtin_ia32_cvtuqq2pd128_mask", IX86_BUILTIN_CVTUQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_UQI },
32733 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_mask, "__builtin_ia32_vpermt2varq256_mask", IX86_BUILTIN_VPERMT2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
32734 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_maskz, "__builtin_ia32_vpermt2varq256_maskz", IX86_BUILTIN_VPERMT2VARQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
32735 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_mask, "__builtin_ia32_vpermt2vard256_mask", IX86_BUILTIN_VPERMT2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
32736 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_maskz, "__builtin_ia32_vpermt2vard256_maskz", IX86_BUILTIN_VPERMT2VARD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
32737 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4di3_mask, "__builtin_ia32_vpermi2varq256_mask", IX86_BUILTIN_VPERMI2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
32738 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8si3_mask, "__builtin_ia32_vpermi2vard256_mask", IX86_BUILTIN_VPERMI2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
32739 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_mask, "__builtin_ia32_vpermt2varpd256_mask", IX86_BUILTIN_VPERMT2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_UQI },
32740 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_maskz, "__builtin_ia32_vpermt2varpd256_maskz", IX86_BUILTIN_VPERMT2VARPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_UQI },
32741 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_mask, "__builtin_ia32_vpermt2varps256_mask", IX86_BUILTIN_VPERMT2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_UQI },
32742 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_maskz, "__builtin_ia32_vpermt2varps256_maskz", IX86_BUILTIN_VPERMT2VARPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_UQI },
32743 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4df3_mask, "__builtin_ia32_vpermi2varpd256_mask", IX86_BUILTIN_VPERMI2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_UQI },
32744 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8sf3_mask, "__builtin_ia32_vpermi2varps256_mask", IX86_BUILTIN_VPERMI2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_UQI },
32745 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_mask, "__builtin_ia32_vpermt2varq128_mask", IX86_BUILTIN_VPERMT2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
32746 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_maskz, "__builtin_ia32_vpermt2varq128_maskz", IX86_BUILTIN_VPERMT2VARQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
32747 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_mask, "__builtin_ia32_vpermt2vard128_mask", IX86_BUILTIN_VPERMT2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
32748 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_maskz, "__builtin_ia32_vpermt2vard128_maskz", IX86_BUILTIN_VPERMT2VARD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
32749 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2di3_mask, "__builtin_ia32_vpermi2varq128_mask", IX86_BUILTIN_VPERMI2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
32750 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4si3_mask, "__builtin_ia32_vpermi2vard128_mask", IX86_BUILTIN_VPERMI2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
32751 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_mask, "__builtin_ia32_vpermt2varpd128_mask", IX86_BUILTIN_VPERMT2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_UQI },
32752 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_maskz, "__builtin_ia32_vpermt2varpd128_maskz", IX86_BUILTIN_VPERMT2VARPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_UQI },
32753 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_mask, "__builtin_ia32_vpermt2varps128_mask", IX86_BUILTIN_VPERMT2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_UQI },
32754 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_maskz, "__builtin_ia32_vpermt2varps128_maskz", IX86_BUILTIN_VPERMT2VARPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_UQI },
32755 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2df3_mask, "__builtin_ia32_vpermi2varpd128_mask", IX86_BUILTIN_VPERMI2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_UQI },
32756 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4sf3_mask, "__builtin_ia32_vpermi2varps128_mask", IX86_BUILTIN_VPERMI2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_UQI },
32757 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pshufbv32qi3_mask, "__builtin_ia32_pshufb256_mask", IX86_BUILTIN_PSHUFB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
32758 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pshufbv16qi3_mask, "__builtin_ia32_pshufb128_mask", IX86_BUILTIN_PSHUFB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
32759 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhwv3_mask, "__builtin_ia32_pshufhw256_mask", IX86_BUILTIN_PSHUFHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
32760 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhw_mask, "__builtin_ia32_pshufhw128_mask", IX86_BUILTIN_PSHUFHW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
32761 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflwv3_mask, "__builtin_ia32_pshuflw256_mask", IX86_BUILTIN_PSHUFLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
32762 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflw_mask, "__builtin_ia32_pshuflw128_mask", IX86_BUILTIN_PSHUFLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
32763 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufdv3_mask, "__builtin_ia32_pshufd256_mask", IX86_BUILTIN_PSHUFD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
32764 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufd_mask, "__builtin_ia32_pshufd128_mask", IX86_BUILTIN_PSHUFD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
32765 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufpd256_mask, "__builtin_ia32_shufpd256_mask", IX86_BUILTIN_SHUFPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI },
32766 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_shufpd_mask, "__builtin_ia32_shufpd128_mask", IX86_BUILTIN_SHUFPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI },
32767 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufps256_mask, "__builtin_ia32_shufps256_mask", IX86_BUILTIN_SHUFPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI },
32768 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_shufps_mask, "__builtin_ia32_shufps128_mask", IX86_BUILTIN_SHUFPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI },
32769 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4di_mask, "__builtin_ia32_prolvq256_mask", IX86_BUILTIN_PROLVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
32770 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv2di_mask, "__builtin_ia32_prolvq128_mask", IX86_BUILTIN_PROLVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
32771 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4di_mask, "__builtin_ia32_prolq256_mask", IX86_BUILTIN_PROLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
32772 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv2di_mask, "__builtin_ia32_prolq128_mask", IX86_BUILTIN_PROLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
32773 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4di_mask, "__builtin_ia32_prorvq256_mask", IX86_BUILTIN_PRORVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
32774 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv2di_mask, "__builtin_ia32_prorvq128_mask", IX86_BUILTIN_PRORVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
32775 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4di_mask, "__builtin_ia32_prorq256_mask", IX86_BUILTIN_PRORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
32776 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv2di_mask, "__builtin_ia32_prorq128_mask", IX86_BUILTIN_PRORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
32777 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv2di_mask, "__builtin_ia32_psravq128_mask", IX86_BUILTIN_PSRAVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
32778 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4di_mask, "__builtin_ia32_psravq256_mask", IX86_BUILTIN_PSRAVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
32779 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4di_mask, "__builtin_ia32_psllv4di_mask", IX86_BUILTIN_PSLLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
32780 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv2di_mask, "__builtin_ia32_psllv2di_mask", IX86_BUILTIN_PSLLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
32781 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv8si_mask, "__builtin_ia32_psllv8si_mask", IX86_BUILTIN_PSLLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
32782 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4si_mask, "__builtin_ia32_psllv4si_mask", IX86_BUILTIN_PSLLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
32783 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv8si_mask, "__builtin_ia32_psrav8si_mask", IX86_BUILTIN_PSRAVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
32784 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4si_mask, "__builtin_ia32_psrav4si_mask", IX86_BUILTIN_PSRAVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
32785 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4di_mask, "__builtin_ia32_psrlv4di_mask", IX86_BUILTIN_PSRLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
32786 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv2di_mask, "__builtin_ia32_psrlv2di_mask", IX86_BUILTIN_PSRLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
32787 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv8si_mask, "__builtin_ia32_psrlv8si_mask", IX86_BUILTIN_PSRLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
32788 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4si_mask, "__builtin_ia32_psrlv4si_mask", IX86_BUILTIN_PSRLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
32789 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psrawi256_mask", IX86_BUILTIN_PSRAWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
32790 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psraw256_mask", IX86_BUILTIN_PSRAW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_UHI },
32791 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psrawi128_mask", IX86_BUILTIN_PSRAWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
32792 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psraw128_mask", IX86_BUILTIN_PSRAW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32793 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlwi256_mask", IX86_BUILTIN_PSRLWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
32794 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlw256_mask", IX86_BUILTIN_PSRLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_UHI },
32795 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlwi128_mask", IX86_BUILTIN_PSRLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
32796 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlw128_mask", IX86_BUILTIN_PSRLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32797 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv8si_mask, "__builtin_ia32_prorvd256_mask", IX86_BUILTIN_PRORVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
32798 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv8si_mask, "__builtin_ia32_prolvd256_mask", IX86_BUILTIN_PROLVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
32799 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv8si_mask, "__builtin_ia32_prord256_mask", IX86_BUILTIN_PRORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
32800 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv8si_mask, "__builtin_ia32_prold256_mask", IX86_BUILTIN_PROLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
32801 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4si_mask, "__builtin_ia32_prorvd128_mask", IX86_BUILTIN_PRORVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
32802 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4si_mask, "__builtin_ia32_prolvd128_mask", IX86_BUILTIN_PROLVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
32803 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4si_mask, "__builtin_ia32_prord128_mask", IX86_BUILTIN_PRORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
32804 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4si_mask, "__builtin_ia32_prold128_mask", IX86_BUILTIN_PROLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
32805 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4df_mask, "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256, UNKNOWN, (int) QI_FTYPE_V4DF_INT_UQI },
32806 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv2df_mask, "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128, UNKNOWN, (int) QI_FTYPE_V2DF_INT_UQI },
32807 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv2df, "__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int) QI_FTYPE_V2DF_INT },
32808 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv8sf_mask, "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256, UNKNOWN, (int) QI_FTYPE_V8SF_INT_UQI },
32809 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4sf_mask, "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128, UNKNOWN, (int) QI_FTYPE_V4SF_INT_UQI },
32810 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv4sf, "__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int) QI_FTYPE_V4SF_INT },
32811 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128", IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) UHI_FTYPE_V16QI },
32812 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256", IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) USI_FTYPE_V32QI },
32813 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128", IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) UQI_FTYPE_V8HI },
32814 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv16hi, "__builtin_ia32_cvtw2mask256", IX86_BUILTIN_CVTW2MASK256, UNKNOWN, (int) UHI_FTYPE_V16HI },
32815 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv4si, "__builtin_ia32_cvtd2mask128", IX86_BUILTIN_CVTD2MASK128, UNKNOWN, (int) UQI_FTYPE_V4SI },
32816 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv8si, "__builtin_ia32_cvtd2mask256", IX86_BUILTIN_CVTD2MASK256, UNKNOWN, (int) UQI_FTYPE_V8SI },
32817 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv2di, "__builtin_ia32_cvtq2mask128", IX86_BUILTIN_CVTQ2MASK128, UNKNOWN, (int) UQI_FTYPE_V2DI },
32818 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv4di, "__builtin_ia32_cvtq2mask256", IX86_BUILTIN_CVTQ2MASK256, UNKNOWN, (int) UQI_FTYPE_V4DI },
32819 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv16qi, "__builtin_ia32_cvtmask2b128", IX86_BUILTIN_CVTMASK2B128, UNKNOWN, (int) V16QI_FTYPE_UHI },
32820 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv32qi, "__builtin_ia32_cvtmask2b256", IX86_BUILTIN_CVTMASK2B256, UNKNOWN, (int) V32QI_FTYPE_USI },
32821 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv8hi, "__builtin_ia32_cvtmask2w128", IX86_BUILTIN_CVTMASK2W128, UNKNOWN, (int) V8HI_FTYPE_UQI },
32822 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv16hi, "__builtin_ia32_cvtmask2w256", IX86_BUILTIN_CVTMASK2W256, UNKNOWN, (int) V16HI_FTYPE_UHI },
32823 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv4si, "__builtin_ia32_cvtmask2d128", IX86_BUILTIN_CVTMASK2D128, UNKNOWN, (int) V4SI_FTYPE_UQI },
32824 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv8si, "__builtin_ia32_cvtmask2d256", IX86_BUILTIN_CVTMASK2D256, UNKNOWN, (int) V8SI_FTYPE_UQI },
32825 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv2di, "__builtin_ia32_cvtmask2q128", IX86_BUILTIN_CVTMASK2Q128, UNKNOWN, (int) V2DI_FTYPE_UQI },
32826 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv4di, "__builtin_ia32_cvtmask2q256", IX86_BUILTIN_CVTMASK2Q256, UNKNOWN, (int) V4DI_FTYPE_UQI },
32827 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16qi3_mask, "__builtin_ia32_pcmpeqb128_mask", IX86_BUILTIN_PCMPEQB128_MASK, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI },
32828 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv32qi3_mask, "__builtin_ia32_pcmpeqb256_mask", IX86_BUILTIN_PCMPEQB256_MASK, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI },
32829 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8hi3_mask, "__builtin_ia32_pcmpeqw128_mask", IX86_BUILTIN_PCMPEQW128_MASK, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_UQI },
32830 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16hi3_mask, "__builtin_ia32_pcmpeqw256_mask", IX86_BUILTIN_PCMPEQW256_MASK, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_UHI },
32831 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4si3_mask, "__builtin_ia32_pcmpeqd128_mask", IX86_BUILTIN_PCMPEQD128_MASK, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_UQI },
32832 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8si3_mask, "__builtin_ia32_pcmpeqd256_mask", IX86_BUILTIN_PCMPEQD256_MASK, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_UQI },
32833 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv2di3_mask, "__builtin_ia32_pcmpeqq128_mask", IX86_BUILTIN_PCMPEQQ128_MASK, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_UQI },
32834 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4di3_mask, "__builtin_ia32_pcmpeqq256_mask", IX86_BUILTIN_PCMPEQQ256_MASK, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_UQI },
32835 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16qi3_mask, "__builtin_ia32_pcmpgtb128_mask", IX86_BUILTIN_PCMPGTB128_MASK, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI },
32836 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv32qi3_mask, "__builtin_ia32_pcmpgtb256_mask", IX86_BUILTIN_PCMPGTB256_MASK, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI },
32837 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8hi3_mask, "__builtin_ia32_pcmpgtw128_mask", IX86_BUILTIN_PCMPGTW128_MASK, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_UQI },
32838 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16hi3_mask, "__builtin_ia32_pcmpgtw256_mask", IX86_BUILTIN_PCMPGTW256_MASK, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_UHI },
32839 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4si3_mask, "__builtin_ia32_pcmpgtd128_mask", IX86_BUILTIN_PCMPGTD128_MASK, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_UQI },
32840 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8si3_mask, "__builtin_ia32_pcmpgtd256_mask", IX86_BUILTIN_PCMPGTD256_MASK, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_UQI },
32841 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv2di3_mask, "__builtin_ia32_pcmpgtq128_mask", IX86_BUILTIN_PCMPGTQ128_MASK, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_UQI },
32842 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4di3_mask, "__builtin_ia32_pcmpgtq256_mask", IX86_BUILTIN_PCMPGTQ256_MASK, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_UQI },
32843 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16qi3_mask, "__builtin_ia32_ptestmb128", IX86_BUILTIN_PTESTMB128, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI },
32844 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv32qi3_mask, "__builtin_ia32_ptestmb256", IX86_BUILTIN_PTESTMB256, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI },
32845 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8hi3_mask, "__builtin_ia32_ptestmw128", IX86_BUILTIN_PTESTMW128, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_UQI },
32846 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16hi3_mask, "__builtin_ia32_ptestmw256", IX86_BUILTIN_PTESTMW256, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_UHI },
32847 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4si3_mask, "__builtin_ia32_ptestmd128", IX86_BUILTIN_PTESTMD128, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_UQI },
32848 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8si3_mask, "__builtin_ia32_ptestmd256", IX86_BUILTIN_PTESTMD256, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_UQI },
32849 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv2di3_mask, "__builtin_ia32_ptestmq128", IX86_BUILTIN_PTESTMQ128, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_UQI },
32850 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4di3_mask, "__builtin_ia32_ptestmq256", IX86_BUILTIN_PTESTMQ256, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_UQI },
32851 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16qi3_mask, "__builtin_ia32_ptestnmb128", IX86_BUILTIN_PTESTNMB128, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI },
32852 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv32qi3_mask, "__builtin_ia32_ptestnmb256", IX86_BUILTIN_PTESTNMB256, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI },
32853 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8hi3_mask, "__builtin_ia32_ptestnmw128", IX86_BUILTIN_PTESTNMW128, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_UQI },
32854 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16hi3_mask, "__builtin_ia32_ptestnmw256", IX86_BUILTIN_PTESTNMW256, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_UHI },
32855 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4si3_mask, "__builtin_ia32_ptestnmd128", IX86_BUILTIN_PTESTNMD128, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_UQI },
32856 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8si3_mask, "__builtin_ia32_ptestnmd256", IX86_BUILTIN_PTESTNMD256, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_UQI },
32857 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv2di3_mask, "__builtin_ia32_ptestnmq128", IX86_BUILTIN_PTESTNMQ128, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_UQI },
32858 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4di3_mask, "__builtin_ia32_ptestnmq256", IX86_BUILTIN_PTESTNMQ256, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_UQI },
32859 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv2di, "__builtin_ia32_broadcastmb128", IX86_BUILTIN_PBROADCASTMB128, UNKNOWN, (int) V2DI_FTYPE_UQI },
32860 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv4di, "__builtin_ia32_broadcastmb256", IX86_BUILTIN_PBROADCASTMB256, UNKNOWN, (int) V4DI_FTYPE_UQI },
32861 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv4si, "__builtin_ia32_broadcastmw128", IX86_BUILTIN_PBROADCASTMW128, UNKNOWN, (int) V4SI_FTYPE_UHI },
32862 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv8si, "__builtin_ia32_broadcastmw256", IX86_BUILTIN_PBROADCASTMW256, UNKNOWN, (int) V8SI_FTYPE_UHI },
32863 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4df_mask, "__builtin_ia32_compressdf256_mask", IX86_BUILTIN_COMPRESSPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
32864 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2df_mask, "__builtin_ia32_compressdf128_mask", IX86_BUILTIN_COMPRESSPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
32865 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8sf_mask, "__builtin_ia32_compresssf256_mask", IX86_BUILTIN_COMPRESSPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
32866 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4sf_mask, "__builtin_ia32_compresssf128_mask", IX86_BUILTIN_COMPRESSPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
32867 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4di_mask, "__builtin_ia32_compressdi256_mask", IX86_BUILTIN_PCOMPRESSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
32868 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2di_mask, "__builtin_ia32_compressdi128_mask", IX86_BUILTIN_PCOMPRESSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
32869 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8si_mask, "__builtin_ia32_compresssi256_mask", IX86_BUILTIN_PCOMPRESSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
32870 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4si_mask, "__builtin_ia32_compresssi128_mask", IX86_BUILTIN_PCOMPRESSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
32871 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expanddf256_mask", IX86_BUILTIN_EXPANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
32872 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expanddf128_mask", IX86_BUILTIN_EXPANDPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
32873 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandsf256_mask", IX86_BUILTIN_EXPANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
32874 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandsf128_mask", IX86_BUILTIN_EXPANDPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
32875 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expanddi256_mask", IX86_BUILTIN_PEXPANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
32876 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expanddi128_mask", IX86_BUILTIN_PEXPANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
32877 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandsi256_mask", IX86_BUILTIN_PEXPANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
32878 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandsi128_mask", IX86_BUILTIN_PEXPANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
32879 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expanddf256_maskz", IX86_BUILTIN_EXPANDPD256Z, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
32880 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expanddf128_maskz", IX86_BUILTIN_EXPANDPD128Z, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
32881 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandsf256_maskz", IX86_BUILTIN_EXPANDPS256Z, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
32882 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandsf128_maskz", IX86_BUILTIN_EXPANDPS128Z, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
32883 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expanddi256_maskz", IX86_BUILTIN_PEXPANDQ256Z, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
32884 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expanddi128_maskz", IX86_BUILTIN_PEXPANDQ128Z, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
32885 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandsi256_maskz", IX86_BUILTIN_PEXPANDD256Z, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
32886 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandsi128_maskz", IX86_BUILTIN_PEXPANDD128Z, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
32887 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8si3_mask, "__builtin_ia32_pmaxsd256_mask", IX86_BUILTIN_PMAXSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
32888 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8si3_mask, "__builtin_ia32_pminsd256_mask", IX86_BUILTIN_PMINSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
32889 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8si3_mask, "__builtin_ia32_pmaxud256_mask", IX86_BUILTIN_PMAXUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
32890 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8si3_mask, "__builtin_ia32_pminud256_mask", IX86_BUILTIN_PMINUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
32891 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4si3_mask, "__builtin_ia32_pmaxsd128_mask", IX86_BUILTIN_PMAXSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
32892 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4si3_mask, "__builtin_ia32_pminsd128_mask", IX86_BUILTIN_PMINSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
32893 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4si3_mask, "__builtin_ia32_pmaxud128_mask", IX86_BUILTIN_PMAXUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
32894 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4si3_mask, "__builtin_ia32_pminud128_mask", IX86_BUILTIN_PMINUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
32895 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4di3_mask, "__builtin_ia32_pmaxsq256_mask", IX86_BUILTIN_PMAXSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
32896 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4di3_mask, "__builtin_ia32_pminsq256_mask", IX86_BUILTIN_PMINSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
32897 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4di3_mask, "__builtin_ia32_pmaxuq256_mask", IX86_BUILTIN_PMAXUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
32898 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4di3_mask, "__builtin_ia32_pminuq256_mask", IX86_BUILTIN_PMINUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
32899 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2di3_mask, "__builtin_ia32_pmaxsq128_mask", IX86_BUILTIN_PMAXSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
32900 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2di3_mask, "__builtin_ia32_pminsq128_mask", IX86_BUILTIN_PMINSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
32901 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv2di3_mask, "__builtin_ia32_pmaxuq128_mask", IX86_BUILTIN_PMAXUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
32902 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv2di3_mask, "__builtin_ia32_pminuq128_mask", IX86_BUILTIN_PMINUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
32903 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv32qi3_mask, "__builtin_ia32_pminsb256_mask", IX86_BUILTIN_PMINSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
32904 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv32qi3_mask, "__builtin_ia32_pminub256_mask", IX86_BUILTIN_PMINUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
32905 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv32qi3_mask, "__builtin_ia32_pmaxsb256_mask", IX86_BUILTIN_PMAXSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
32906 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv32qi3_mask, "__builtin_ia32_pmaxub256_mask", IX86_BUILTIN_PMAXUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
32907 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16qi3_mask, "__builtin_ia32_pminsb128_mask", IX86_BUILTIN_PMINSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
32908 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16qi3_mask, "__builtin_ia32_pminub128_mask", IX86_BUILTIN_PMINUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
32909 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16qi3_mask, "__builtin_ia32_pmaxsb128_mask", IX86_BUILTIN_PMAXSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
32910 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16qi3_mask, "__builtin_ia32_pmaxub128_mask", IX86_BUILTIN_PMAXUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
32911 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16hi3_mask, "__builtin_ia32_pminsw256_mask", IX86_BUILTIN_PMINSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
32912 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16hi3_mask, "__builtin_ia32_pminuw256_mask", IX86_BUILTIN_PMINUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
32913 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16hi3_mask, "__builtin_ia32_pmaxsw256_mask", IX86_BUILTIN_PMAXSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
32914 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16hi3_mask, "__builtin_ia32_pmaxuw256_mask", IX86_BUILTIN_PMAXUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
32915 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8hi3_mask, "__builtin_ia32_pminsw128_mask", IX86_BUILTIN_PMINSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32916 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8hi3_mask, "__builtin_ia32_pminuw128_mask", IX86_BUILTIN_PMINUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32917 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8hi3_mask, "__builtin_ia32_pmaxsw128_mask", IX86_BUILTIN_PMAXSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32918 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8hi3_mask, "__builtin_ia32_pmaxuw128_mask", IX86_BUILTIN_PMAXUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32919 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4di_mask, "__builtin_ia32_vpconflictdi_256_mask", IX86_BUILTIN_VPCONFLICTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
32920 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv8si_mask, "__builtin_ia32_vpconflictsi_256_mask", IX86_BUILTIN_VPCONFLICTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
32921 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4di2_mask, "__builtin_ia32_vplzcntq_256_mask", IX86_BUILTIN_VPCLZCNTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
32922 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv8si2_mask, "__builtin_ia32_vplzcntd_256_mask", IX86_BUILTIN_VPCLZCNTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
32923 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhpd256_mask, "__builtin_ia32_unpckhpd256_mask", IX86_BUILTIN_UNPCKHPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
32924 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpckhpd128_mask, "__builtin_ia32_unpckhpd128_mask", IX86_BUILTIN_UNPCKHPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
32925 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhps256_mask, "__builtin_ia32_unpckhps256_mask", IX86_BUILTIN_UNPCKHPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
32926 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4sf_mask, "__builtin_ia32_unpckhps128_mask", IX86_BUILTIN_UNPCKHPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
32927 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklpd256_mask, "__builtin_ia32_unpcklpd256_mask", IX86_BUILTIN_UNPCKLPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
32928 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpcklpd128_mask, "__builtin_ia32_unpcklpd128_mask", IX86_BUILTIN_UNPCKLPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
32929 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklps256_mask, "__builtin_ia32_unpcklps256_mask", IX86_BUILTIN_UNPCKLPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
32930 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv2di_mask, "__builtin_ia32_vpconflictdi_128_mask", IX86_BUILTIN_VPCONFLICTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
32931 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4si_mask, "__builtin_ia32_vpconflictsi_128_mask", IX86_BUILTIN_VPCONFLICTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
32932 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv2di2_mask, "__builtin_ia32_vplzcntq_128_mask", IX86_BUILTIN_VPCLZCNTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
32933 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4si2_mask, "__builtin_ia32_vplzcntd_128_mask", IX86_BUILTIN_VPCLZCNTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
32934 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_unpcklps128_mask, "__builtin_ia32_unpcklps128_mask", IX86_BUILTIN_UNPCKLPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
32935 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv8si_mask, "__builtin_ia32_alignd256_mask", IX86_BUILTIN_ALIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI },
32936 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4di_mask, "__builtin_ia32_alignq256_mask", IX86_BUILTIN_ALIGNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI },
32937 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4si_mask, "__builtin_ia32_alignd128_mask", IX86_BUILTIN_ALIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_UQI },
32938 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv2di_mask, "__builtin_ia32_alignq128_mask", IX86_BUILTIN_ALIGNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UQI },
32939 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph256_mask, "__builtin_ia32_vcvtps2ph256_mask", IX86_BUILTIN_CVTPS2PH256_MASK, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT_V8HI_UQI },
32940 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph_mask, "__builtin_ia32_vcvtps2ph_mask", IX86_BUILTIN_CVTPS2PH_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT_V8HI_UQI },
32941 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps_mask, "__builtin_ia32_vcvtph2ps_mask", IX86_BUILTIN_CVTPH2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V8HI_V4SF_UQI },
32942 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps256_mask, "__builtin_ia32_vcvtph2ps256_mask", IX86_BUILTIN_CVTPH2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8HI_V8SF_UQI },
32943 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4si_mask, "__builtin_ia32_punpckhdq128_mask", IX86_BUILTIN_PUNPCKHDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
32944 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv8si_mask, "__builtin_ia32_punpckhdq256_mask", IX86_BUILTIN_PUNPCKHDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
32945 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv2di_mask, "__builtin_ia32_punpckhqdq128_mask", IX86_BUILTIN_PUNPCKHQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
32946 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv4di_mask, "__builtin_ia32_punpckhqdq256_mask", IX86_BUILTIN_PUNPCKHQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
32947 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv4si_mask, "__builtin_ia32_punpckldq128_mask", IX86_BUILTIN_PUNPCKLDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
32948 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv8si_mask, "__builtin_ia32_punpckldq256_mask", IX86_BUILTIN_PUNPCKLDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
32949 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv2di_mask, "__builtin_ia32_punpcklqdq128_mask", IX86_BUILTIN_PUNPCKLQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
32950 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv4di_mask, "__builtin_ia32_punpcklqdq256_mask", IX86_BUILTIN_PUNPCKLQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
32951 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv16qi_mask, "__builtin_ia32_punpckhbw128_mask", IX86_BUILTIN_PUNPCKHBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
32952 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv32qi_mask, "__builtin_ia32_punpckhbw256_mask", IX86_BUILTIN_PUNPCKHBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
32953 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv8hi_mask, "__builtin_ia32_punpckhwd128_mask", IX86_BUILTIN_PUNPCKHWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32954 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv16hi_mask, "__builtin_ia32_punpckhwd256_mask", IX86_BUILTIN_PUNPCKHWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
32955 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv16qi_mask, "__builtin_ia32_punpcklbw128_mask", IX86_BUILTIN_PUNPCKLBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
32956 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv32qi_mask, "__builtin_ia32_punpcklbw256_mask", IX86_BUILTIN_PUNPCKLBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
32957 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv8hi_mask, "__builtin_ia32_punpcklwd128_mask", IX86_BUILTIN_PUNPCKLWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32958 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv16hi_mask, "__builtin_ia32_punpcklwd256_mask", IX86_BUILTIN_PUNPCKLWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
32959 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv16hi_mask, "__builtin_ia32_psllv16hi_mask", IX86_BUILTIN_PSLLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
32960 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv8hi_mask, "__builtin_ia32_psllv8hi_mask", IX86_BUILTIN_PSLLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32961 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packssdw_mask, "__builtin_ia32_packssdw256_mask", IX86_BUILTIN_PACKSSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_UHI },
32962 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packssdw_mask, "__builtin_ia32_packssdw128_mask", IX86_BUILTIN_PACKSSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_UQI },
32963 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packusdw_mask, "__builtin_ia32_packusdw256_mask", IX86_BUILTIN_PACKUSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_UHI },
32964 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_packusdw_mask, "__builtin_ia32_packusdw128_mask", IX86_BUILTIN_PACKUSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_UQI },
32965 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv32qi3_mask, "__builtin_ia32_pavgb256_mask", IX86_BUILTIN_PAVGB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
32966 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv16hi3_mask, "__builtin_ia32_pavgw256_mask", IX86_BUILTIN_PAVGW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
32967 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv16qi3_mask, "__builtin_ia32_pavgb128_mask", IX86_BUILTIN_PAVGB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
32968 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv8hi3_mask, "__builtin_ia32_pavgw128_mask", IX86_BUILTIN_PAVGW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32969 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8sf_mask, "__builtin_ia32_permvarsf256_mask", IX86_BUILTIN_VPERMVARSF256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_UQI },
32970 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4df_mask, "__builtin_ia32_permvardf256_mask", IX86_BUILTIN_VPERMVARDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_UQI },
32971 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4df_mask, "__builtin_ia32_permdf256_mask", IX86_BUILTIN_VPERMDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
32972 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv32qi2_mask, "__builtin_ia32_pabsb256_mask", IX86_BUILTIN_PABSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI },
32973 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16qi2_mask, "__builtin_ia32_pabsb128_mask", IX86_BUILTIN_PABSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI },
32974 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16hi2_mask, "__builtin_ia32_pabsw256_mask", IX86_BUILTIN_PABSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI },
32975 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8hi2_mask, "__builtin_ia32_pabsw128_mask", IX86_BUILTIN_PABSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI },
32976 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv2df3_mask, "__builtin_ia32_vpermilvarpd_mask", IX86_BUILTIN_VPERMILVARPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_UQI },
32977 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4sf3_mask, "__builtin_ia32_vpermilvarps_mask", IX86_BUILTIN_VPERMILVARPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_UQI },
32978 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4df3_mask, "__builtin_ia32_vpermilvarpd256_mask", IX86_BUILTIN_VPERMILVARPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_UQI },
32979 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv8sf3_mask, "__builtin_ia32_vpermilvarps256_mask", IX86_BUILTIN_VPERMILVARPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_UQI },
32980 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv2df_mask, "__builtin_ia32_vpermilpd_mask", IX86_BUILTIN_VPERMILPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_UQI },
32981 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4sf_mask, "__builtin_ia32_vpermilps_mask", IX86_BUILTIN_VPERMILPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_UQI },
32982 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4df_mask, "__builtin_ia32_vpermilpd256_mask", IX86_BUILTIN_VPERMILPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
32983 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv8sf_mask, "__builtin_ia32_vpermilps256_mask", IX86_BUILTIN_VPERMILPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI },
32984 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4di, "__builtin_ia32_blendmq_256_mask", IX86_BUILTIN_BLENDMQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
32985 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8si, "__builtin_ia32_blendmd_256_mask", IX86_BUILTIN_BLENDMD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
32986 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4df, "__builtin_ia32_blendmpd_256_mask", IX86_BUILTIN_BLENDMPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
32987 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8sf, "__builtin_ia32_blendmps_256_mask", IX86_BUILTIN_BLENDMPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
32988 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2di, "__builtin_ia32_blendmq_128_mask", IX86_BUILTIN_BLENDMQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
32989 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4si, "__builtin_ia32_blendmd_128_mask", IX86_BUILTIN_BLENDMD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
32990 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2df, "__builtin_ia32_blendmpd_128_mask", IX86_BUILTIN_BLENDMPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
32991 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4sf, "__builtin_ia32_blendmps_128_mask", IX86_BUILTIN_BLENDMPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
32992 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16hi, "__builtin_ia32_blendmw_256_mask", IX86_BUILTIN_BLENDMW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI },
32993 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv32qi, "__builtin_ia32_blendmb_256_mask", IX86_BUILTIN_BLENDMB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI },
32994 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8hi, "__builtin_ia32_blendmw_128_mask", IX86_BUILTIN_BLENDMW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI },
32995 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16qi, "__builtin_ia32_blendmb_128_mask", IX86_BUILTIN_BLENDMB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI },
32996 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8si3_mask, "__builtin_ia32_pmulld256_mask", IX86_BUILTIN_PMULLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
32997 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4si3_mask, "__builtin_ia32_pmulld128_mask", IX86_BUILTIN_PMULLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
32998 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v8si_mask, "__builtin_ia32_pmuludq256_mask", IX86_BUILTIN_PMULUDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_UQI },
32999 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_smult_even_v8si_mask, "__builtin_ia32_pmuldq256_mask", IX86_BUILTIN_PMULDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_UQI },
33000 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_mulv2siv2di3_mask, "__builtin_ia32_pmuldq128_mask", IX86_BUILTIN_PMULDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_UQI },
33001 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v4si_mask, "__builtin_ia32_pmuludq128_mask", IX86_BUILTIN_PMULUDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_UQI },
33002 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2ps256_mask, "__builtin_ia32_cvtpd2ps256_mask", IX86_BUILTIN_CVTPD2PS256_MASK, UNKNOWN, (int) V4SF_FTYPE_V4DF_V4SF_UQI },
33003 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2ps_mask, "__builtin_ia32_cvtpd2ps_mask", IX86_BUILTIN_CVTPD2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V2DF_V4SF_UQI },
33004 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8si_mask, "__builtin_ia32_permvarsi256_mask", IX86_BUILTIN_VPERMVARSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
33005 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4di_mask, "__builtin_ia32_permvardi256_mask", IX86_BUILTIN_VPERMVARDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
33006 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4di_mask, "__builtin_ia32_permdi256_mask", IX86_BUILTIN_VPERMDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
33007 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4di3_mask, "__builtin_ia32_cmpq256_mask", IX86_BUILTIN_CMPQ256, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_INT_UQI },
33008 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8si3_mask, "__builtin_ia32_cmpd256_mask", IX86_BUILTIN_CMPD256, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_INT_UQI },
33009 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4di3_mask, "__builtin_ia32_ucmpq256_mask", IX86_BUILTIN_UCMPQ256, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_INT_UQI },
33010 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8si3_mask, "__builtin_ia32_ucmpd256_mask", IX86_BUILTIN_UCMPD256, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_INT_UQI },
33011 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv32qi3_mask, "__builtin_ia32_cmpb256_mask", IX86_BUILTIN_CMPB256, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_INT_USI },
33012 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16hi3_mask, "__builtin_ia32_cmpw256_mask", IX86_BUILTIN_CMPW256, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_INT_UHI },
33013 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv32qi3_mask, "__builtin_ia32_ucmpb256_mask", IX86_BUILTIN_UCMPB256, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_INT_USI },
33014 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16hi3_mask, "__builtin_ia32_ucmpw256_mask", IX86_BUILTIN_UCMPW256, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_INT_UHI },
33015 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4df3_mask, "__builtin_ia32_cmppd256_mask", IX86_BUILTIN_CMPPD256_MASK, UNKNOWN, (int) QI_FTYPE_V4DF_V4DF_INT_UQI },
33016 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8sf3_mask, "__builtin_ia32_cmpps256_mask", IX86_BUILTIN_CMPPS256_MASK, UNKNOWN, (int) QI_FTYPE_V8SF_V8SF_INT_UQI },
33017 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2di3_mask, "__builtin_ia32_cmpq128_mask", IX86_BUILTIN_CMPQ128, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_INT_UQI },
33018 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4si3_mask, "__builtin_ia32_cmpd128_mask", IX86_BUILTIN_CMPD128, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_INT_UQI },
33019 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv2di3_mask, "__builtin_ia32_ucmpq128_mask", IX86_BUILTIN_UCMPQ128, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_INT_UQI },
33020 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4si3_mask, "__builtin_ia32_ucmpd128_mask", IX86_BUILTIN_UCMPD128, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_INT_UQI },
33021 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16qi3_mask, "__builtin_ia32_cmpb128_mask", IX86_BUILTIN_CMPB128, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_INT_UHI },
33022 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8hi3_mask, "__builtin_ia32_cmpw128_mask", IX86_BUILTIN_CMPW128, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_INT_UQI },
33023 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16qi3_mask, "__builtin_ia32_ucmpb128_mask", IX86_BUILTIN_UCMPB128, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_INT_UHI },
33024 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8hi3_mask, "__builtin_ia32_ucmpw128_mask", IX86_BUILTIN_UCMPW128, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_INT_UQI },
33025 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_ia32_cmppd128_mask", IX86_BUILTIN_CMPPD128_MASK, UNKNOWN, (int) UQI_FTYPE_V2DF_V2DF_INT_UQI },
33026 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) UQI_FTYPE_V4SF_V4SF_INT_UQI },
33028 /* AVX512DQ. */
33029 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI },
33030 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI },
33031 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_UQI },
33032 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_UQI },
33033 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_UHI },
33034 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_UHI },
33035 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_UQI },
33036 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_UQI },
33037 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_UQI },
33038 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_UQI },
33039 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI },
33040 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI },
33041 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33042 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
33043 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
33044 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
33045 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
33046 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
33047 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
33048 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI},
33049 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
33050 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI },
33051 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI },
33052 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI },
33053 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI },
33054 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_UQI },
33055 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_UHI },
33056 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) UHI_FTYPE_V16SI },
33057 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) UQI_FTYPE_V8DI },
33058 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_UHI },
33059 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_UQI },
33061 /* AVX512BW. */
33062 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) USI_FTYPE_USI_USI },
33063 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) UDI_FTYPE_UDI_UDI },
33064 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_USI },
33065 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
33066 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
33067 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_USI },
33068 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT },
33069 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT },
33070 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI },
33071 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI },
33072 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI },
33073 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI },
33074 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_UDI },
33075 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_UDI },
33076 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_USI },
33077 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_USI },
33078 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_USI },
33079 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_USI },
33080 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
33081 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
33082 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
33083 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
33084 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
33085 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
33086 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
33087 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
33088 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
33089 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
33090 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
33091 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
33092 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
33093 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
33094 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
33095 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
33096 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
33097 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
33098 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
33099 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
33100 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
33101 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
33102 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
33103 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
33104 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
33105 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
33106 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI },
33107 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI },
33108 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI },
33109 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
33110 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
33111 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
33112 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
33113 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
33114 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI },
33115 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_UDI },
33116 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_UDI },
33117 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
33118 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_USI },
33119 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_UHI },
33120 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
33121 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
33122 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
33123 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
33124 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
33125 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
33126 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
33127 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
33128 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
33129 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI },
33130 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
33131 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI },
33132 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) UDI_FTYPE_V64QI },
33133 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) USI_FTYPE_V32HI },
33134 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_UDI },
33135 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_USI },
33136 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI },
33137 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI },
33138 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI },
33139 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI },
33140 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI },
33141 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI },
33142 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI },
33143 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI },
33144 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
33145 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI },
33146 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI },
33147 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI },
33148 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI },
33149 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_INT_UDI },
33150 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI },
33151 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_INT_UDI },
33152 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI },
33154 /* AVX512IFMA */
33155 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33156 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33157 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33158 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33159 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
33160 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
33161 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
33162 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_maskz, "__builtin_ia32_vpmadd52huq256_maskz", IX86_BUILTIN_VPMADD52HUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
33163 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_mask, "__builtin_ia32_vpmadd52luq128_mask", IX86_BUILTIN_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
33164 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_maskz, "__builtin_ia32_vpmadd52luq128_maskz", IX86_BUILTIN_VPMADD52LUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
33165 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_mask, "__builtin_ia32_vpmadd52huq128_mask", IX86_BUILTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
33166 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_maskz, "__builtin_ia32_vpmadd52huq128_maskz", IX86_BUILTIN_VPMADD52HUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
33168 /* AVX512VBMI */
33169 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
33170 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv32qi_mask, "__builtin_ia32_vpmultishiftqb256_mask", IX86_BUILTIN_VPMULTISHIFTQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
33171 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv16qi_mask, "__builtin_ia32_vpmultishiftqb128_mask", IX86_BUILTIN_VPMULTISHIFTQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
33172 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
33173 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
33174 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
33175 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
33176 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv32qi_mask, "__builtin_ia32_permvarqi256_mask", IX86_BUILTIN_VPERMVARQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
33177 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16qi_mask, "__builtin_ia32_permvarqi128_mask", IX86_BUILTIN_VPERMVARQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
33178 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_mask, "__builtin_ia32_vpermt2varqi256_mask", IX86_BUILTIN_VPERMT2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
33179 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_maskz, "__builtin_ia32_vpermt2varqi256_maskz", IX86_BUILTIN_VPERMT2VARQI256_MASKZ, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
33180 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_mask, "__builtin_ia32_vpermt2varqi128_mask", IX86_BUILTIN_VPERMT2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
33181 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_maskz, "__builtin_ia32_vpermt2varqi128_maskz", IX86_BUILTIN_VPERMT2VARQI128_MASKZ, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
33182 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv32qi3_mask, "__builtin_ia32_vpermi2varqi256_mask", IX86_BUILTIN_VPERMI2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
33183 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16qi3_mask, "__builtin_ia32_vpermi2varqi128_mask", IX86_BUILTIN_VPERMI2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
33186 /* Builtins with rounding support. */
33187 static const struct builtin_description bdesc_round_args[] =
33189 /* AVX512F */
33190 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
33191 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33192 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33193 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33194 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) UQI_FTYPE_V8DF_V8DF_INT_UQI_INT },
33195 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) UHI_FTYPE_V16SF_V16SF_INT_UHI_INT },
33196 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) UQI_FTYPE_V2DF_V2DF_INT_UQI_INT },
33197 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) UQI_FTYPE_V4SF_V4SF_INT_UQI_INT },
33198 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
33199 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
33200 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
33201 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33202 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
33203 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33204 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
33205 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33206 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
33207 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33208 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
33209 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
33210 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
33211 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
33212 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
33213 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33214 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33215 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33216 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33217 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
33218 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
33219 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
33220 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
33221 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
33222 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33223 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33224 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33225 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
33226 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
33227 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
33228 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
33229 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
33230 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
33231 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
33232 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
33233 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33234 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33235 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33236 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33237 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
33238 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
33239 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33240 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33241 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
33242 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33243 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33244 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33245 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
33246 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33247 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33248 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33249 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
33250 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33251 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33252 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33253 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
33254 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
33255 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33256 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33257 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
33258 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33259 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33260 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33261 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33262 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33263 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33264 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33265 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
33266 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33267 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33268 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33269 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
33270 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
33271 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
33272 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
33273 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
33274 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
33275 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
33276 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
33277 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
33278 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
33279 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
33280 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
33281 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
33282 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
33283 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
33284 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
33285 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
33286 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
33287 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
33288 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33289 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33290 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33291 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
33292 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
33293 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
33294 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
33295 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
33296 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33297 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33298 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33299 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
33300 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33301 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
33302 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33303 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
33304 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33305 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
33306 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
33307 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33308 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33310 /* AVX512ER */
33311 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33312 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33313 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33314 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33315 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33316 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33317 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33318 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33319 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33320 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33322 /* AVX512DQ. */
33323 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv2df_round, "__builtin_ia32_rangesd128_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33324 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv4sf_round, "__builtin_ia32_rangess128_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33325 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33326 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33327 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33328 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33329 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
33330 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
33331 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
33332 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
33333 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33334 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33335 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33336 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33337 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT },
33338 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT },
33341 /* Bultins for MPX. */
33342 static const struct builtin_description bdesc_mpx[] =
33344 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
33345 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33346 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33349 /* Const builtins for MPX. */
33350 static const struct builtin_description bdesc_mpx_const[] =
33352 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
33353 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
33354 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
33355 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
33356 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
33357 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
33358 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
33359 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
33362 /* FMA4 and XOP. */
33363 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
33364 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
33365 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
33366 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
33367 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
33368 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
33369 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
33370 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
33371 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
33372 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
33373 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
33374 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
33375 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
33376 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
33377 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
33378 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
33379 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
33380 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
33381 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
33382 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
33383 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
33384 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
33385 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
33386 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
33387 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
33388 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
33389 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
33390 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
33391 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
33392 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
33393 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
33394 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
33395 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
33396 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
33397 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
33398 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
33399 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
33400 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
33401 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
33402 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
33403 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
33404 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
33405 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
33406 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
33407 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
33408 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
33409 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
33410 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
33411 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
33412 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
33413 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
33414 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
33416 static const struct builtin_description bdesc_multi_arg[] =
33418 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
33419 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
33420 UNKNOWN, (int)MULTI_ARG_3_SF },
33421 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
33422 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
33423 UNKNOWN, (int)MULTI_ARG_3_DF },
33425 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
33426 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
33427 UNKNOWN, (int)MULTI_ARG_3_SF },
33428 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
33429 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
33430 UNKNOWN, (int)MULTI_ARG_3_DF },
33432 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
33433 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
33434 UNKNOWN, (int)MULTI_ARG_3_SF },
33435 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
33436 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
33437 UNKNOWN, (int)MULTI_ARG_3_DF },
33438 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
33439 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
33440 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33441 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
33442 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
33443 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33445 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
33446 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
33447 UNKNOWN, (int)MULTI_ARG_3_SF },
33448 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
33449 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
33450 UNKNOWN, (int)MULTI_ARG_3_DF },
33451 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
33452 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
33453 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33454 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
33455 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
33456 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33458 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
33459 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
33460 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
33461 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
33462 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
33463 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
33464 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
33466 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33467 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33468 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
33469 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
33470 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
33471 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
33472 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
33474 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
33476 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33477 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33478 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33479 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33480 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33481 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33482 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33483 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33484 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33485 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33486 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33487 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33489 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33490 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
33491 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
33492 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
33493 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
33494 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
33495 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
33496 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
33497 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33498 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
33499 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
33500 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
33501 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33502 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
33503 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
33504 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
33506 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
33507 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
33508 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
33509 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
33510 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
33511 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
33513 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33514 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33515 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33516 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33517 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33518 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33519 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33520 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33521 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33522 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33523 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33524 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33525 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33526 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33527 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33529 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
33530 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33531 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33532 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
33533 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
33534 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
33535 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
33537 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
33538 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33539 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33540 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
33541 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
33542 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
33543 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
33545 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
33546 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33547 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33548 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
33549 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
33550 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
33551 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
33553 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33554 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33555 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33556 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
33557 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
33558 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
33559 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
33561 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
33562 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33563 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33564 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
33565 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
33566 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
33567 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
33569 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
33570 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33571 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33572 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
33573 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
33574 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
33575 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
33577 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
33578 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33579 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33580 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
33581 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
33582 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
33583 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
33585 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33586 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33587 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33588 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
33589 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
33590 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
33591 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
33593 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33594 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33595 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33596 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33597 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33598 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33599 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33600 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33602 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33603 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33604 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33605 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33606 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33607 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33608 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33609 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33611 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
33612 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
33613 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
33614 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
33618 /* TM vector builtins. */
33620 /* Reuse the existing x86-specific `struct builtin_description' cause
33621 we're lazy. Add casts to make them fit. */
33622 static const struct builtin_description bdesc_tm[] =
33624 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33625 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33626 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33627 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33628 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33629 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33630 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33632 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33633 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33634 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33635 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33636 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33637 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33638 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33640 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33641 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33642 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33643 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33644 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33645 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33646 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33648 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
33649 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
33650 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
33653 /* TM callbacks. */
33655 /* Return the builtin decl needed to load a vector of TYPE. */
33657 static tree
33658 ix86_builtin_tm_load (tree type)
33660 if (TREE_CODE (type) == VECTOR_TYPE)
33662 switch (tree_to_uhwi (TYPE_SIZE (type)))
33664 case 64:
33665 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
33666 case 128:
33667 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
33668 case 256:
33669 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
33672 return NULL_TREE;
33675 /* Return the builtin decl needed to store a vector of TYPE. */
33677 static tree
33678 ix86_builtin_tm_store (tree type)
33680 if (TREE_CODE (type) == VECTOR_TYPE)
33682 switch (tree_to_uhwi (TYPE_SIZE (type)))
33684 case 64:
33685 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
33686 case 128:
33687 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
33688 case 256:
33689 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
33692 return NULL_TREE;
33695 /* Initialize the transactional memory vector load/store builtins. */
33697 static void
33698 ix86_init_tm_builtins (void)
33700 enum ix86_builtin_func_type ftype;
33701 const struct builtin_description *d;
33702 size_t i;
33703 tree decl;
33704 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
33705 tree attrs_log, attrs_type_log;
33707 if (!flag_tm)
33708 return;
33710 /* If there are no builtins defined, we must be compiling in a
33711 language without trans-mem support. */
33712 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
33713 return;
33715 /* Use whatever attributes a normal TM load has. */
33716 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
33717 attrs_load = DECL_ATTRIBUTES (decl);
33718 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33719 /* Use whatever attributes a normal TM store has. */
33720 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
33721 attrs_store = DECL_ATTRIBUTES (decl);
33722 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33723 /* Use whatever attributes a normal TM log has. */
33724 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
33725 attrs_log = DECL_ATTRIBUTES (decl);
33726 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33728 for (i = 0, d = bdesc_tm;
33729 i < ARRAY_SIZE (bdesc_tm);
33730 i++, d++)
33732 if ((d->mask & ix86_isa_flags) != 0
33733 || (lang_hooks.builtin_function
33734 == lang_hooks.builtin_function_ext_scope))
33736 tree type, attrs, attrs_type;
33737 enum built_in_function code = (enum built_in_function) d->code;
33739 ftype = (enum ix86_builtin_func_type) d->flag;
33740 type = ix86_get_builtin_func_type (ftype);
33742 if (BUILTIN_TM_LOAD_P (code))
33744 attrs = attrs_load;
33745 attrs_type = attrs_type_load;
33747 else if (BUILTIN_TM_STORE_P (code))
33749 attrs = attrs_store;
33750 attrs_type = attrs_type_store;
33752 else
33754 attrs = attrs_log;
33755 attrs_type = attrs_type_log;
33757 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
33758 /* The builtin without the prefix for
33759 calling it directly. */
33760 d->name + strlen ("__builtin_"),
33761 attrs);
33762 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
33763 set the TYPE_ATTRIBUTES. */
33764 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
33766 set_builtin_decl (code, decl, false);
33771 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
33772 in the current target ISA to allow the user to compile particular modules
33773 with different target specific options that differ from the command line
33774 options. */
33775 static void
33776 ix86_init_mmx_sse_builtins (void)
33778 const struct builtin_description * d;
33779 enum ix86_builtin_func_type ftype;
33780 size_t i;
33782 /* Add all special builtins with variable number of operands. */
33783 for (i = 0, d = bdesc_special_args;
33784 i < ARRAY_SIZE (bdesc_special_args);
33785 i++, d++)
33787 if (d->name == 0)
33788 continue;
33790 ftype = (enum ix86_builtin_func_type) d->flag;
33791 def_builtin (d->mask, d->name, ftype, d->code);
33794 /* Add all builtins with variable number of operands. */
33795 for (i = 0, d = bdesc_args;
33796 i < ARRAY_SIZE (bdesc_args);
33797 i++, d++)
33799 if (d->name == 0)
33800 continue;
33802 ftype = (enum ix86_builtin_func_type) d->flag;
33803 def_builtin_const (d->mask, d->name, ftype, d->code);
33806 /* Add all builtins with rounding. */
33807 for (i = 0, d = bdesc_round_args;
33808 i < ARRAY_SIZE (bdesc_round_args);
33809 i++, d++)
33811 if (d->name == 0)
33812 continue;
33814 ftype = (enum ix86_builtin_func_type) d->flag;
33815 def_builtin_const (d->mask, d->name, ftype, d->code);
33818 /* pcmpestr[im] insns. */
33819 for (i = 0, d = bdesc_pcmpestr;
33820 i < ARRAY_SIZE (bdesc_pcmpestr);
33821 i++, d++)
33823 if (d->code == IX86_BUILTIN_PCMPESTRM128)
33824 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
33825 else
33826 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
33827 def_builtin_const (d->mask, d->name, ftype, d->code);
33830 /* pcmpistr[im] insns. */
33831 for (i = 0, d = bdesc_pcmpistr;
33832 i < ARRAY_SIZE (bdesc_pcmpistr);
33833 i++, d++)
33835 if (d->code == IX86_BUILTIN_PCMPISTRM128)
33836 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
33837 else
33838 ftype = INT_FTYPE_V16QI_V16QI_INT;
33839 def_builtin_const (d->mask, d->name, ftype, d->code);
33842 /* comi/ucomi insns. */
33843 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
33845 if (d->mask == OPTION_MASK_ISA_SSE2)
33846 ftype = INT_FTYPE_V2DF_V2DF;
33847 else
33848 ftype = INT_FTYPE_V4SF_V4SF;
33849 def_builtin_const (d->mask, d->name, ftype, d->code);
33852 /* SSE */
33853 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
33854 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
33855 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
33856 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
33858 /* SSE or 3DNow!A */
33859 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33860 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
33861 IX86_BUILTIN_MASKMOVQ);
33863 /* SSE2 */
33864 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
33865 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
33867 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
33868 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
33869 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
33870 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
33872 /* SSE3. */
33873 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
33874 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
33875 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
33876 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
33878 /* AES */
33879 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
33880 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
33881 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
33882 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
33883 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
33884 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
33885 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
33886 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
33887 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
33888 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
33889 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
33890 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
33892 /* PCLMUL */
33893 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
33894 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
33896 /* RDRND */
33897 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
33898 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
33899 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
33900 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
33901 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
33902 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
33903 IX86_BUILTIN_RDRAND64_STEP);
33905 /* AVX2 */
33906 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
33907 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
33908 IX86_BUILTIN_GATHERSIV2DF);
33910 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
33911 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
33912 IX86_BUILTIN_GATHERSIV4DF);
33914 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
33915 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
33916 IX86_BUILTIN_GATHERDIV2DF);
33918 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
33919 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
33920 IX86_BUILTIN_GATHERDIV4DF);
33922 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
33923 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
33924 IX86_BUILTIN_GATHERSIV4SF);
33926 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
33927 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
33928 IX86_BUILTIN_GATHERSIV8SF);
33930 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
33931 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
33932 IX86_BUILTIN_GATHERDIV4SF);
33934 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
33935 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
33936 IX86_BUILTIN_GATHERDIV8SF);
33938 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
33939 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
33940 IX86_BUILTIN_GATHERSIV2DI);
33942 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
33943 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
33944 IX86_BUILTIN_GATHERSIV4DI);
33946 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
33947 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
33948 IX86_BUILTIN_GATHERDIV2DI);
33950 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
33951 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
33952 IX86_BUILTIN_GATHERDIV4DI);
33954 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
33955 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
33956 IX86_BUILTIN_GATHERSIV4SI);
33958 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
33959 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
33960 IX86_BUILTIN_GATHERSIV8SI);
33962 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
33963 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
33964 IX86_BUILTIN_GATHERDIV4SI);
33966 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
33967 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
33968 IX86_BUILTIN_GATHERDIV8SI);
33970 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
33971 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
33972 IX86_BUILTIN_GATHERALTSIV4DF);
33974 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
33975 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
33976 IX86_BUILTIN_GATHERALTDIV8SF);
33978 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
33979 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
33980 IX86_BUILTIN_GATHERALTSIV4DI);
33982 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
33983 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
33984 IX86_BUILTIN_GATHERALTDIV8SI);
33986 /* AVX512F */
33987 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
33988 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
33989 IX86_BUILTIN_GATHER3SIV16SF);
33991 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
33992 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
33993 IX86_BUILTIN_GATHER3SIV8DF);
33995 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
33996 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
33997 IX86_BUILTIN_GATHER3DIV16SF);
33999 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
34000 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
34001 IX86_BUILTIN_GATHER3DIV8DF);
34003 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
34004 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
34005 IX86_BUILTIN_GATHER3SIV16SI);
34007 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
34008 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
34009 IX86_BUILTIN_GATHER3SIV8DI);
34011 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
34012 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
34013 IX86_BUILTIN_GATHER3DIV16SI);
34015 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
34016 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
34017 IX86_BUILTIN_GATHER3DIV8DI);
34019 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
34020 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
34021 IX86_BUILTIN_GATHER3ALTSIV8DF);
34023 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
34024 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
34025 IX86_BUILTIN_GATHER3ALTDIV16SF);
34027 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
34028 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
34029 IX86_BUILTIN_GATHER3ALTSIV8DI);
34031 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
34032 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
34033 IX86_BUILTIN_GATHER3ALTDIV16SI);
34035 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
34036 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
34037 IX86_BUILTIN_SCATTERSIV16SF);
34039 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
34040 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
34041 IX86_BUILTIN_SCATTERSIV8DF);
34043 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
34044 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
34045 IX86_BUILTIN_SCATTERDIV16SF);
34047 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
34048 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
34049 IX86_BUILTIN_SCATTERDIV8DF);
34051 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
34052 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
34053 IX86_BUILTIN_SCATTERSIV16SI);
34055 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
34056 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
34057 IX86_BUILTIN_SCATTERSIV8DI);
34059 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
34060 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
34061 IX86_BUILTIN_SCATTERDIV16SI);
34063 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
34064 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
34065 IX86_BUILTIN_SCATTERDIV8DI);
34067 /* AVX512VL */
34068 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
34069 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_QI_INT,
34070 IX86_BUILTIN_GATHER3SIV2DF);
34072 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
34073 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_QI_INT,
34074 IX86_BUILTIN_GATHER3SIV4DF);
34076 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
34077 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_QI_INT,
34078 IX86_BUILTIN_GATHER3DIV2DF);
34080 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
34081 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_QI_INT,
34082 IX86_BUILTIN_GATHER3DIV4DF);
34084 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
34085 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_QI_INT,
34086 IX86_BUILTIN_GATHER3SIV4SF);
34088 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
34089 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_QI_INT,
34090 IX86_BUILTIN_GATHER3SIV8SF);
34092 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
34093 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_QI_INT,
34094 IX86_BUILTIN_GATHER3DIV4SF);
34096 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
34097 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_QI_INT,
34098 IX86_BUILTIN_GATHER3DIV8SF);
34100 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
34101 V2DI_FTYPE_V2DI_PCINT64_V4SI_QI_INT,
34102 IX86_BUILTIN_GATHER3SIV2DI);
34104 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
34105 V4DI_FTYPE_V4DI_PCINT64_V4SI_QI_INT,
34106 IX86_BUILTIN_GATHER3SIV4DI);
34108 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
34109 V2DI_FTYPE_V2DI_PCINT64_V2DI_QI_INT,
34110 IX86_BUILTIN_GATHER3DIV2DI);
34112 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
34113 V4DI_FTYPE_V4DI_PCINT64_V4DI_QI_INT,
34114 IX86_BUILTIN_GATHER3DIV4DI);
34116 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
34117 V4SI_FTYPE_V4SI_PCINT_V4SI_QI_INT,
34118 IX86_BUILTIN_GATHER3SIV4SI);
34120 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
34121 V8SI_FTYPE_V8SI_PCINT_V8SI_QI_INT,
34122 IX86_BUILTIN_GATHER3SIV8SI);
34124 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
34125 V4SI_FTYPE_V4SI_PCINT_V2DI_QI_INT,
34126 IX86_BUILTIN_GATHER3DIV4SI);
34128 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
34129 V4SI_FTYPE_V4SI_PCINT_V4DI_QI_INT,
34130 IX86_BUILTIN_GATHER3DIV8SI);
34132 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
34133 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
34134 IX86_BUILTIN_GATHER3ALTSIV4DF);
34136 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
34137 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
34138 IX86_BUILTIN_GATHER3ALTDIV8SF);
34140 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
34141 V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
34142 IX86_BUILTIN_GATHER3ALTSIV4DI);
34144 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
34145 V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
34146 IX86_BUILTIN_GATHER3ALTDIV8SI);
34148 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
34149 VOID_FTYPE_PFLOAT_QI_V8SI_V8SF_INT,
34150 IX86_BUILTIN_SCATTERSIV8SF);
34152 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
34153 VOID_FTYPE_PFLOAT_QI_V4SI_V4SF_INT,
34154 IX86_BUILTIN_SCATTERSIV4SF);
34156 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
34157 VOID_FTYPE_PDOUBLE_QI_V4SI_V4DF_INT,
34158 IX86_BUILTIN_SCATTERSIV4DF);
34160 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
34161 VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT,
34162 IX86_BUILTIN_SCATTERSIV2DF);
34164 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
34165 VOID_FTYPE_PFLOAT_QI_V4DI_V4SF_INT,
34166 IX86_BUILTIN_SCATTERDIV8SF);
34168 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
34169 VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT,
34170 IX86_BUILTIN_SCATTERDIV4SF);
34172 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
34173 VOID_FTYPE_PDOUBLE_QI_V4DI_V4DF_INT,
34174 IX86_BUILTIN_SCATTERDIV4DF);
34176 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
34177 VOID_FTYPE_PDOUBLE_QI_V2DI_V2DF_INT,
34178 IX86_BUILTIN_SCATTERDIV2DF);
34180 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
34181 VOID_FTYPE_PINT_QI_V8SI_V8SI_INT,
34182 IX86_BUILTIN_SCATTERSIV8SI);
34184 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
34185 VOID_FTYPE_PINT_QI_V4SI_V4SI_INT,
34186 IX86_BUILTIN_SCATTERSIV4SI);
34188 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
34189 VOID_FTYPE_PLONGLONG_QI_V4SI_V4DI_INT,
34190 IX86_BUILTIN_SCATTERSIV4DI);
34192 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
34193 VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT,
34194 IX86_BUILTIN_SCATTERSIV2DI);
34196 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
34197 VOID_FTYPE_PINT_QI_V4DI_V4SI_INT,
34198 IX86_BUILTIN_SCATTERDIV8SI);
34200 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
34201 VOID_FTYPE_PINT_QI_V2DI_V4SI_INT,
34202 IX86_BUILTIN_SCATTERDIV4SI);
34204 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
34205 VOID_FTYPE_PLONGLONG_QI_V4DI_V4DI_INT,
34206 IX86_BUILTIN_SCATTERDIV4DI);
34208 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
34209 VOID_FTYPE_PLONGLONG_QI_V2DI_V2DI_INT,
34210 IX86_BUILTIN_SCATTERDIV2DI);
34211 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltsiv8df ",
34212 VOID_FTYPE_PDOUBLE_QI_V16SI_V8DF_INT,
34213 IX86_BUILTIN_SCATTERALTSIV8DF);
34215 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv8sf ",
34216 VOID_FTYPE_PFLOAT_HI_V8DI_V16SF_INT,
34217 IX86_BUILTIN_SCATTERALTDIV16SF);
34219 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltsiv8di ",
34220 VOID_FTYPE_PLONGLONG_QI_V16SI_V8DI_INT,
34221 IX86_BUILTIN_SCATTERALTSIV8DI);
34223 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv8si ",
34224 VOID_FTYPE_PINT_HI_V8DI_V16SI_INT,
34225 IX86_BUILTIN_SCATTERALTDIV16SI);
34227 /* AVX512PF */
34228 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
34229 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
34230 IX86_BUILTIN_GATHERPFDPD);
34231 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
34232 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
34233 IX86_BUILTIN_GATHERPFDPS);
34234 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
34235 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
34236 IX86_BUILTIN_GATHERPFQPD);
34237 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
34238 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
34239 IX86_BUILTIN_GATHERPFQPS);
34240 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
34241 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
34242 IX86_BUILTIN_SCATTERPFDPD);
34243 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
34244 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
34245 IX86_BUILTIN_SCATTERPFDPS);
34246 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
34247 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
34248 IX86_BUILTIN_SCATTERPFQPD);
34249 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
34250 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
34251 IX86_BUILTIN_SCATTERPFQPS);
34253 /* SHA */
34254 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
34255 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
34256 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
34257 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
34258 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
34259 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
34260 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
34261 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
34262 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
34263 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
34264 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
34265 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
34266 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
34267 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
34269 /* RTM. */
34270 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
34271 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
34273 /* MMX access to the vec_init patterns. */
34274 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
34275 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
34277 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
34278 V4HI_FTYPE_HI_HI_HI_HI,
34279 IX86_BUILTIN_VEC_INIT_V4HI);
34281 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
34282 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
34283 IX86_BUILTIN_VEC_INIT_V8QI);
34285 /* Access to the vec_extract patterns. */
34286 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
34287 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
34288 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
34289 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
34290 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
34291 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
34292 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
34293 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
34294 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
34295 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
34297 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
34298 "__builtin_ia32_vec_ext_v4hi",
34299 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
34301 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
34302 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
34304 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
34305 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
34307 /* Access to the vec_set patterns. */
34308 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
34309 "__builtin_ia32_vec_set_v2di",
34310 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
34312 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
34313 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
34315 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
34316 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
34318 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
34319 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
34321 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
34322 "__builtin_ia32_vec_set_v4hi",
34323 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
34325 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
34326 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
34328 /* RDSEED */
34329 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
34330 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
34331 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
34332 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
34333 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
34334 "__builtin_ia32_rdseed_di_step",
34335 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
34337 /* ADCX */
34338 def_builtin (0, "__builtin_ia32_addcarryx_u32",
34339 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
34340 def_builtin (OPTION_MASK_ISA_64BIT,
34341 "__builtin_ia32_addcarryx_u64",
34342 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
34343 IX86_BUILTIN_ADDCARRYX64);
34345 /* SBB */
34346 def_builtin (0, "__builtin_ia32_sbb_u32",
34347 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
34348 def_builtin (OPTION_MASK_ISA_64BIT,
34349 "__builtin_ia32_sbb_u64",
34350 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
34351 IX86_BUILTIN_SBB64);
34353 /* Read/write FLAGS. */
34354 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
34355 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34356 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
34357 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34358 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
34359 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
34360 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
34361 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
34363 /* CLFLUSHOPT. */
34364 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
34365 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
34367 /* CLWB. */
34368 def_builtin (OPTION_MASK_ISA_CLWB, "__builtin_ia32_clwb",
34369 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB);
34371 /* MONITORX and MWAITX. */
34372 def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_monitorx",
34373 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITORX);
34374 def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_mwaitx",
34375 VOID_FTYPE_UNSIGNED_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAITX);
34377 /* Add FMA4 multi-arg argument instructions */
34378 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
34380 if (d->name == 0)
34381 continue;
34383 ftype = (enum ix86_builtin_func_type) d->flag;
34384 def_builtin_const (d->mask, d->name, ftype, d->code);
34388 static void
34389 ix86_init_mpx_builtins ()
34391 const struct builtin_description * d;
34392 enum ix86_builtin_func_type ftype;
34393 tree decl;
34394 size_t i;
34396 for (i = 0, d = bdesc_mpx;
34397 i < ARRAY_SIZE (bdesc_mpx);
34398 i++, d++)
34400 if (d->name == 0)
34401 continue;
34403 ftype = (enum ix86_builtin_func_type) d->flag;
34404 decl = def_builtin (d->mask, d->name, ftype, d->code);
34406 /* With no leaf and nothrow flags for MPX builtins
34407 abnormal edges may follow its call when setjmp
34408 presents in the function. Since we may have a lot
34409 of MPX builtins calls it causes lots of useless
34410 edges and enormous PHI nodes. To avoid this we mark
34411 MPX builtins as leaf and nothrow. */
34412 if (decl)
34414 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34415 NULL_TREE);
34416 TREE_NOTHROW (decl) = 1;
34418 else
34420 ix86_builtins_isa[(int)d->code].leaf_p = true;
34421 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34425 for (i = 0, d = bdesc_mpx_const;
34426 i < ARRAY_SIZE (bdesc_mpx_const);
34427 i++, d++)
34429 if (d->name == 0)
34430 continue;
34432 ftype = (enum ix86_builtin_func_type) d->flag;
34433 decl = def_builtin_const (d->mask, d->name, ftype, d->code);
34435 if (decl)
34437 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34438 NULL_TREE);
34439 TREE_NOTHROW (decl) = 1;
34441 else
34443 ix86_builtins_isa[(int)d->code].leaf_p = true;
34444 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34449 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
34450 to return a pointer to VERSION_DECL if the outcome of the expression
34451 formed by PREDICATE_CHAIN is true. This function will be called during
34452 version dispatch to decide which function version to execute. It returns
34453 the basic block at the end, to which more conditions can be added. */
34455 static basic_block
34456 add_condition_to_bb (tree function_decl, tree version_decl,
34457 tree predicate_chain, basic_block new_bb)
34459 gimple return_stmt;
34460 tree convert_expr, result_var;
34461 gimple convert_stmt;
34462 gimple call_cond_stmt;
34463 gimple if_else_stmt;
34465 basic_block bb1, bb2, bb3;
34466 edge e12, e23;
34468 tree cond_var, and_expr_var = NULL_TREE;
34469 gimple_seq gseq;
34471 tree predicate_decl, predicate_arg;
34473 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
34475 gcc_assert (new_bb != NULL);
34476 gseq = bb_seq (new_bb);
34479 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
34480 build_fold_addr_expr (version_decl));
34481 result_var = create_tmp_var (ptr_type_node);
34482 convert_stmt = gimple_build_assign (result_var, convert_expr);
34483 return_stmt = gimple_build_return (result_var);
34485 if (predicate_chain == NULL_TREE)
34487 gimple_seq_add_stmt (&gseq, convert_stmt);
34488 gimple_seq_add_stmt (&gseq, return_stmt);
34489 set_bb_seq (new_bb, gseq);
34490 gimple_set_bb (convert_stmt, new_bb);
34491 gimple_set_bb (return_stmt, new_bb);
34492 pop_cfun ();
34493 return new_bb;
34496 while (predicate_chain != NULL)
34498 cond_var = create_tmp_var (integer_type_node);
34499 predicate_decl = TREE_PURPOSE (predicate_chain);
34500 predicate_arg = TREE_VALUE (predicate_chain);
34501 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
34502 gimple_call_set_lhs (call_cond_stmt, cond_var);
34504 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
34505 gimple_set_bb (call_cond_stmt, new_bb);
34506 gimple_seq_add_stmt (&gseq, call_cond_stmt);
34508 predicate_chain = TREE_CHAIN (predicate_chain);
34510 if (and_expr_var == NULL)
34511 and_expr_var = cond_var;
34512 else
34514 gimple assign_stmt;
34515 /* Use MIN_EXPR to check if any integer is zero?.
34516 and_expr_var = min_expr <cond_var, and_expr_var> */
34517 assign_stmt = gimple_build_assign (and_expr_var,
34518 build2 (MIN_EXPR, integer_type_node,
34519 cond_var, and_expr_var));
34521 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
34522 gimple_set_bb (assign_stmt, new_bb);
34523 gimple_seq_add_stmt (&gseq, assign_stmt);
34527 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
34528 integer_zero_node,
34529 NULL_TREE, NULL_TREE);
34530 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
34531 gimple_set_bb (if_else_stmt, new_bb);
34532 gimple_seq_add_stmt (&gseq, if_else_stmt);
34534 gimple_seq_add_stmt (&gseq, convert_stmt);
34535 gimple_seq_add_stmt (&gseq, return_stmt);
34536 set_bb_seq (new_bb, gseq);
34538 bb1 = new_bb;
34539 e12 = split_block (bb1, if_else_stmt);
34540 bb2 = e12->dest;
34541 e12->flags &= ~EDGE_FALLTHRU;
34542 e12->flags |= EDGE_TRUE_VALUE;
34544 e23 = split_block (bb2, return_stmt);
34546 gimple_set_bb (convert_stmt, bb2);
34547 gimple_set_bb (return_stmt, bb2);
34549 bb3 = e23->dest;
34550 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
34552 remove_edge (e23);
34553 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
34555 pop_cfun ();
34557 return bb3;
34560 /* This parses the attribute arguments to target in DECL and determines
34561 the right builtin to use to match the platform specification.
34562 It returns the priority value for this version decl. If PREDICATE_LIST
34563 is not NULL, it stores the list of cpu features that need to be checked
34564 before dispatching this function. */
34566 static unsigned int
34567 get_builtin_code_for_version (tree decl, tree *predicate_list)
34569 tree attrs;
34570 struct cl_target_option cur_target;
34571 tree target_node;
34572 struct cl_target_option *new_target;
34573 const char *arg_str = NULL;
34574 const char *attrs_str = NULL;
34575 char *tok_str = NULL;
34576 char *token;
34578 /* Priority of i386 features, greater value is higher priority. This is
34579 used to decide the order in which function dispatch must happen. For
34580 instance, a version specialized for SSE4.2 should be checked for dispatch
34581 before a version for SSE3, as SSE4.2 implies SSE3. */
34582 enum feature_priority
34584 P_ZERO = 0,
34585 P_MMX,
34586 P_SSE,
34587 P_SSE2,
34588 P_SSE3,
34589 P_SSSE3,
34590 P_PROC_SSSE3,
34591 P_SSE4_A,
34592 P_PROC_SSE4_A,
34593 P_SSE4_1,
34594 P_SSE4_2,
34595 P_PROC_SSE4_2,
34596 P_POPCNT,
34597 P_AES,
34598 P_PCLMUL,
34599 P_AVX,
34600 P_PROC_AVX,
34601 P_BMI,
34602 P_PROC_BMI,
34603 P_FMA4,
34604 P_XOP,
34605 P_PROC_XOP,
34606 P_FMA,
34607 P_PROC_FMA,
34608 P_BMI2,
34609 P_AVX2,
34610 P_PROC_AVX2,
34611 P_AVX512F,
34612 P_PROC_AVX512F
34615 enum feature_priority priority = P_ZERO;
34617 /* These are the target attribute strings for which a dispatcher is
34618 available, from fold_builtin_cpu. */
34620 static struct _feature_list
34622 const char *const name;
34623 const enum feature_priority priority;
34625 const feature_list[] =
34627 {"mmx", P_MMX},
34628 {"sse", P_SSE},
34629 {"sse2", P_SSE2},
34630 {"sse3", P_SSE3},
34631 {"sse4a", P_SSE4_A},
34632 {"ssse3", P_SSSE3},
34633 {"sse4.1", P_SSE4_1},
34634 {"sse4.2", P_SSE4_2},
34635 {"popcnt", P_POPCNT},
34636 {"aes", P_AES},
34637 {"pclmul", P_PCLMUL},
34638 {"avx", P_AVX},
34639 {"bmi", P_BMI},
34640 {"fma4", P_FMA4},
34641 {"xop", P_XOP},
34642 {"fma", P_FMA},
34643 {"bmi2", P_BMI2},
34644 {"avx2", P_AVX2},
34645 {"avx512f", P_AVX512F}
34649 static unsigned int NUM_FEATURES
34650 = sizeof (feature_list) / sizeof (struct _feature_list);
34652 unsigned int i;
34654 tree predicate_chain = NULL_TREE;
34655 tree predicate_decl, predicate_arg;
34657 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34658 gcc_assert (attrs != NULL);
34660 attrs = TREE_VALUE (TREE_VALUE (attrs));
34662 gcc_assert (TREE_CODE (attrs) == STRING_CST);
34663 attrs_str = TREE_STRING_POINTER (attrs);
34665 /* Return priority zero for default function. */
34666 if (strcmp (attrs_str, "default") == 0)
34667 return 0;
34669 /* Handle arch= if specified. For priority, set it to be 1 more than
34670 the best instruction set the processor can handle. For instance, if
34671 there is a version for atom and a version for ssse3 (the highest ISA
34672 priority for atom), the atom version must be checked for dispatch
34673 before the ssse3 version. */
34674 if (strstr (attrs_str, "arch=") != NULL)
34676 cl_target_option_save (&cur_target, &global_options);
34677 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
34678 &global_options_set);
34680 gcc_assert (target_node);
34681 new_target = TREE_TARGET_OPTION (target_node);
34682 gcc_assert (new_target);
34684 if (new_target->arch_specified && new_target->arch > 0)
34686 switch (new_target->arch)
34688 case PROCESSOR_CORE2:
34689 arg_str = "core2";
34690 priority = P_PROC_SSSE3;
34691 break;
34692 case PROCESSOR_NEHALEM:
34693 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
34694 arg_str = "westmere";
34695 else
34696 /* We translate "arch=corei7" and "arch=nehalem" to
34697 "corei7" so that it will be mapped to M_INTEL_COREI7
34698 as cpu type to cover all M_INTEL_COREI7_XXXs. */
34699 arg_str = "corei7";
34700 priority = P_PROC_SSE4_2;
34701 break;
34702 case PROCESSOR_SANDYBRIDGE:
34703 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
34704 arg_str = "ivybridge";
34705 else
34706 arg_str = "sandybridge";
34707 priority = P_PROC_AVX;
34708 break;
34709 case PROCESSOR_HASWELL:
34710 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
34711 arg_str = "broadwell";
34712 else
34713 arg_str = "haswell";
34714 priority = P_PROC_AVX2;
34715 break;
34716 case PROCESSOR_BONNELL:
34717 arg_str = "bonnell";
34718 priority = P_PROC_SSSE3;
34719 break;
34720 case PROCESSOR_KNL:
34721 arg_str = "knl";
34722 priority = P_PROC_AVX512F;
34723 break;
34724 case PROCESSOR_SILVERMONT:
34725 arg_str = "silvermont";
34726 priority = P_PROC_SSE4_2;
34727 break;
34728 case PROCESSOR_AMDFAM10:
34729 arg_str = "amdfam10h";
34730 priority = P_PROC_SSE4_A;
34731 break;
34732 case PROCESSOR_BTVER1:
34733 arg_str = "btver1";
34734 priority = P_PROC_SSE4_A;
34735 break;
34736 case PROCESSOR_BTVER2:
34737 arg_str = "btver2";
34738 priority = P_PROC_BMI;
34739 break;
34740 case PROCESSOR_BDVER1:
34741 arg_str = "bdver1";
34742 priority = P_PROC_XOP;
34743 break;
34744 case PROCESSOR_BDVER2:
34745 arg_str = "bdver2";
34746 priority = P_PROC_FMA;
34747 break;
34748 case PROCESSOR_BDVER3:
34749 arg_str = "bdver3";
34750 priority = P_PROC_FMA;
34751 break;
34752 case PROCESSOR_BDVER4:
34753 arg_str = "bdver4";
34754 priority = P_PROC_AVX2;
34755 break;
34759 cl_target_option_restore (&global_options, &cur_target);
34761 if (predicate_list && arg_str == NULL)
34763 error_at (DECL_SOURCE_LOCATION (decl),
34764 "No dispatcher found for the versioning attributes");
34765 return 0;
34768 if (predicate_list)
34770 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
34771 /* For a C string literal the length includes the trailing NULL. */
34772 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
34773 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34774 predicate_chain);
34778 /* Process feature name. */
34779 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
34780 strcpy (tok_str, attrs_str);
34781 token = strtok (tok_str, ",");
34782 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
34784 while (token != NULL)
34786 /* Do not process "arch=" */
34787 if (strncmp (token, "arch=", 5) == 0)
34789 token = strtok (NULL, ",");
34790 continue;
34792 for (i = 0; i < NUM_FEATURES; ++i)
34794 if (strcmp (token, feature_list[i].name) == 0)
34796 if (predicate_list)
34798 predicate_arg = build_string_literal (
34799 strlen (feature_list[i].name) + 1,
34800 feature_list[i].name);
34801 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34802 predicate_chain);
34804 /* Find the maximum priority feature. */
34805 if (feature_list[i].priority > priority)
34806 priority = feature_list[i].priority;
34808 break;
34811 if (predicate_list && i == NUM_FEATURES)
34813 error_at (DECL_SOURCE_LOCATION (decl),
34814 "No dispatcher found for %s", token);
34815 return 0;
34817 token = strtok (NULL, ",");
34819 free (tok_str);
34821 if (predicate_list && predicate_chain == NULL_TREE)
34823 error_at (DECL_SOURCE_LOCATION (decl),
34824 "No dispatcher found for the versioning attributes : %s",
34825 attrs_str);
34826 return 0;
34828 else if (predicate_list)
34830 predicate_chain = nreverse (predicate_chain);
34831 *predicate_list = predicate_chain;
34834 return priority;
34837 /* This compares the priority of target features in function DECL1
34838 and DECL2. It returns positive value if DECL1 is higher priority,
34839 negative value if DECL2 is higher priority and 0 if they are the
34840 same. */
34842 static int
34843 ix86_compare_version_priority (tree decl1, tree decl2)
34845 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
34846 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
34848 return (int)priority1 - (int)priority2;
34851 /* V1 and V2 point to function versions with different priorities
34852 based on the target ISA. This function compares their priorities. */
34854 static int
34855 feature_compare (const void *v1, const void *v2)
34857 typedef struct _function_version_info
34859 tree version_decl;
34860 tree predicate_chain;
34861 unsigned int dispatch_priority;
34862 } function_version_info;
34864 const function_version_info c1 = *(const function_version_info *)v1;
34865 const function_version_info c2 = *(const function_version_info *)v2;
34866 return (c2.dispatch_priority - c1.dispatch_priority);
34869 /* This function generates the dispatch function for
34870 multi-versioned functions. DISPATCH_DECL is the function which will
34871 contain the dispatch logic. FNDECLS are the function choices for
34872 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
34873 in DISPATCH_DECL in which the dispatch code is generated. */
34875 static int
34876 dispatch_function_versions (tree dispatch_decl,
34877 void *fndecls_p,
34878 basic_block *empty_bb)
34880 tree default_decl;
34881 gimple ifunc_cpu_init_stmt;
34882 gimple_seq gseq;
34883 int ix;
34884 tree ele;
34885 vec<tree> *fndecls;
34886 unsigned int num_versions = 0;
34887 unsigned int actual_versions = 0;
34888 unsigned int i;
34890 struct _function_version_info
34892 tree version_decl;
34893 tree predicate_chain;
34894 unsigned int dispatch_priority;
34895 }*function_version_info;
34897 gcc_assert (dispatch_decl != NULL
34898 && fndecls_p != NULL
34899 && empty_bb != NULL);
34901 /*fndecls_p is actually a vector. */
34902 fndecls = static_cast<vec<tree> *> (fndecls_p);
34904 /* At least one more version other than the default. */
34905 num_versions = fndecls->length ();
34906 gcc_assert (num_versions >= 2);
34908 function_version_info = (struct _function_version_info *)
34909 XNEWVEC (struct _function_version_info, (num_versions - 1));
34911 /* The first version in the vector is the default decl. */
34912 default_decl = (*fndecls)[0];
34914 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
34916 gseq = bb_seq (*empty_bb);
34917 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
34918 constructors, so explicity call __builtin_cpu_init here. */
34919 ifunc_cpu_init_stmt = gimple_build_call_vec (
34920 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
34921 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
34922 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
34923 set_bb_seq (*empty_bb, gseq);
34925 pop_cfun ();
34928 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
34930 tree version_decl = ele;
34931 tree predicate_chain = NULL_TREE;
34932 unsigned int priority;
34933 /* Get attribute string, parse it and find the right predicate decl.
34934 The predicate function could be a lengthy combination of many
34935 features, like arch-type and various isa-variants. */
34936 priority = get_builtin_code_for_version (version_decl,
34937 &predicate_chain);
34939 if (predicate_chain == NULL_TREE)
34940 continue;
34942 function_version_info [actual_versions].version_decl = version_decl;
34943 function_version_info [actual_versions].predicate_chain
34944 = predicate_chain;
34945 function_version_info [actual_versions].dispatch_priority = priority;
34946 actual_versions++;
34949 /* Sort the versions according to descending order of dispatch priority. The
34950 priority is based on the ISA. This is not a perfect solution. There
34951 could still be ambiguity. If more than one function version is suitable
34952 to execute, which one should be dispatched? In future, allow the user
34953 to specify a dispatch priority next to the version. */
34954 qsort (function_version_info, actual_versions,
34955 sizeof (struct _function_version_info), feature_compare);
34957 for (i = 0; i < actual_versions; ++i)
34958 *empty_bb = add_condition_to_bb (dispatch_decl,
34959 function_version_info[i].version_decl,
34960 function_version_info[i].predicate_chain,
34961 *empty_bb);
34963 /* dispatch default version at the end. */
34964 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
34965 NULL, *empty_bb);
34967 free (function_version_info);
34968 return 0;
34971 /* Comparator function to be used in qsort routine to sort attribute
34972 specification strings to "target". */
34974 static int
34975 attr_strcmp (const void *v1, const void *v2)
34977 const char *c1 = *(char *const*)v1;
34978 const char *c2 = *(char *const*)v2;
34979 return strcmp (c1, c2);
34982 /* ARGLIST is the argument to target attribute. This function tokenizes
34983 the comma separated arguments, sorts them and returns a string which
34984 is a unique identifier for the comma separated arguments. It also
34985 replaces non-identifier characters "=,-" with "_". */
34987 static char *
34988 sorted_attr_string (tree arglist)
34990 tree arg;
34991 size_t str_len_sum = 0;
34992 char **args = NULL;
34993 char *attr_str, *ret_str;
34994 char *attr = NULL;
34995 unsigned int argnum = 1;
34996 unsigned int i;
34998 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
35000 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
35001 size_t len = strlen (str);
35002 str_len_sum += len + 1;
35003 if (arg != arglist)
35004 argnum++;
35005 for (i = 0; i < strlen (str); i++)
35006 if (str[i] == ',')
35007 argnum++;
35010 attr_str = XNEWVEC (char, str_len_sum);
35011 str_len_sum = 0;
35012 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
35014 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
35015 size_t len = strlen (str);
35016 memcpy (attr_str + str_len_sum, str, len);
35017 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
35018 str_len_sum += len + 1;
35021 /* Replace "=,-" with "_". */
35022 for (i = 0; i < strlen (attr_str); i++)
35023 if (attr_str[i] == '=' || attr_str[i]== '-')
35024 attr_str[i] = '_';
35026 if (argnum == 1)
35027 return attr_str;
35029 args = XNEWVEC (char *, argnum);
35031 i = 0;
35032 attr = strtok (attr_str, ",");
35033 while (attr != NULL)
35035 args[i] = attr;
35036 i++;
35037 attr = strtok (NULL, ",");
35040 qsort (args, argnum, sizeof (char *), attr_strcmp);
35042 ret_str = XNEWVEC (char, str_len_sum);
35043 str_len_sum = 0;
35044 for (i = 0; i < argnum; i++)
35046 size_t len = strlen (args[i]);
35047 memcpy (ret_str + str_len_sum, args[i], len);
35048 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
35049 str_len_sum += len + 1;
35052 XDELETEVEC (args);
35053 XDELETEVEC (attr_str);
35054 return ret_str;
35057 /* This function changes the assembler name for functions that are
35058 versions. If DECL is a function version and has a "target"
35059 attribute, it appends the attribute string to its assembler name. */
35061 static tree
35062 ix86_mangle_function_version_assembler_name (tree decl, tree id)
35064 tree version_attr;
35065 const char *orig_name, *version_string;
35066 char *attr_str, *assembler_name;
35068 if (DECL_DECLARED_INLINE_P (decl)
35069 && lookup_attribute ("gnu_inline",
35070 DECL_ATTRIBUTES (decl)))
35071 error_at (DECL_SOURCE_LOCATION (decl),
35072 "Function versions cannot be marked as gnu_inline,"
35073 " bodies have to be generated");
35075 if (DECL_VIRTUAL_P (decl)
35076 || DECL_VINDEX (decl))
35077 sorry ("Virtual function multiversioning not supported");
35079 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
35081 /* target attribute string cannot be NULL. */
35082 gcc_assert (version_attr != NULL_TREE);
35084 orig_name = IDENTIFIER_POINTER (id);
35085 version_string
35086 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
35088 if (strcmp (version_string, "default") == 0)
35089 return id;
35091 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
35092 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
35094 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
35096 /* Allow assembler name to be modified if already set. */
35097 if (DECL_ASSEMBLER_NAME_SET_P (decl))
35098 SET_DECL_RTL (decl, NULL);
35100 tree ret = get_identifier (assembler_name);
35101 XDELETEVEC (attr_str);
35102 XDELETEVEC (assembler_name);
35103 return ret;
35106 /* This function returns true if FN1 and FN2 are versions of the same function,
35107 that is, the target strings of the function decls are different. This assumes
35108 that FN1 and FN2 have the same signature. */
35110 static bool
35111 ix86_function_versions (tree fn1, tree fn2)
35113 tree attr1, attr2;
35114 char *target1, *target2;
35115 bool result;
35117 if (TREE_CODE (fn1) != FUNCTION_DECL
35118 || TREE_CODE (fn2) != FUNCTION_DECL)
35119 return false;
35121 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
35122 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
35124 /* At least one function decl should have the target attribute specified. */
35125 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
35126 return false;
35128 /* Diagnose missing target attribute if one of the decls is already
35129 multi-versioned. */
35130 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
35132 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
35134 if (attr2 != NULL_TREE)
35136 std::swap (fn1, fn2);
35137 attr1 = attr2;
35139 error_at (DECL_SOURCE_LOCATION (fn2),
35140 "missing %<target%> attribute for multi-versioned %D",
35141 fn2);
35142 inform (DECL_SOURCE_LOCATION (fn1),
35143 "previous declaration of %D", fn1);
35144 /* Prevent diagnosing of the same error multiple times. */
35145 DECL_ATTRIBUTES (fn2)
35146 = tree_cons (get_identifier ("target"),
35147 copy_node (TREE_VALUE (attr1)),
35148 DECL_ATTRIBUTES (fn2));
35150 return false;
35153 target1 = sorted_attr_string (TREE_VALUE (attr1));
35154 target2 = sorted_attr_string (TREE_VALUE (attr2));
35156 /* The sorted target strings must be different for fn1 and fn2
35157 to be versions. */
35158 if (strcmp (target1, target2) == 0)
35159 result = false;
35160 else
35161 result = true;
35163 XDELETEVEC (target1);
35164 XDELETEVEC (target2);
35166 return result;
35169 static tree
35170 ix86_mangle_decl_assembler_name (tree decl, tree id)
35172 /* For function version, add the target suffix to the assembler name. */
35173 if (TREE_CODE (decl) == FUNCTION_DECL
35174 && DECL_FUNCTION_VERSIONED (decl))
35175 id = ix86_mangle_function_version_assembler_name (decl, id);
35176 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
35177 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
35178 #endif
35180 return id;
35183 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
35184 is true, append the full path name of the source file. */
35186 static char *
35187 make_name (tree decl, const char *suffix, bool make_unique)
35189 char *global_var_name;
35190 int name_len;
35191 const char *name;
35192 const char *unique_name = NULL;
35194 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
35196 /* Get a unique name that can be used globally without any chances
35197 of collision at link time. */
35198 if (make_unique)
35199 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
35201 name_len = strlen (name) + strlen (suffix) + 2;
35203 if (make_unique)
35204 name_len += strlen (unique_name) + 1;
35205 global_var_name = XNEWVEC (char, name_len);
35207 /* Use '.' to concatenate names as it is demangler friendly. */
35208 if (make_unique)
35209 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
35210 suffix);
35211 else
35212 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
35214 return global_var_name;
35217 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
35219 /* Make a dispatcher declaration for the multi-versioned function DECL.
35220 Calls to DECL function will be replaced with calls to the dispatcher
35221 by the front-end. Return the decl created. */
35223 static tree
35224 make_dispatcher_decl (const tree decl)
35226 tree func_decl;
35227 char *func_name;
35228 tree fn_type, func_type;
35229 bool is_uniq = false;
35231 if (TREE_PUBLIC (decl) == 0)
35232 is_uniq = true;
35234 func_name = make_name (decl, "ifunc", is_uniq);
35236 fn_type = TREE_TYPE (decl);
35237 func_type = build_function_type (TREE_TYPE (fn_type),
35238 TYPE_ARG_TYPES (fn_type));
35240 func_decl = build_fn_decl (func_name, func_type);
35241 XDELETEVEC (func_name);
35242 TREE_USED (func_decl) = 1;
35243 DECL_CONTEXT (func_decl) = NULL_TREE;
35244 DECL_INITIAL (func_decl) = error_mark_node;
35245 DECL_ARTIFICIAL (func_decl) = 1;
35246 /* Mark this func as external, the resolver will flip it again if
35247 it gets generated. */
35248 DECL_EXTERNAL (func_decl) = 1;
35249 /* This will be of type IFUNCs have to be externally visible. */
35250 TREE_PUBLIC (func_decl) = 1;
35252 return func_decl;
35255 #endif
35257 /* Returns true if decl is multi-versioned and DECL is the default function,
35258 that is it is not tagged with target specific optimization. */
35260 static bool
35261 is_function_default_version (const tree decl)
35263 if (TREE_CODE (decl) != FUNCTION_DECL
35264 || !DECL_FUNCTION_VERSIONED (decl))
35265 return false;
35266 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
35267 gcc_assert (attr);
35268 attr = TREE_VALUE (TREE_VALUE (attr));
35269 return (TREE_CODE (attr) == STRING_CST
35270 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
35273 /* Make a dispatcher declaration for the multi-versioned function DECL.
35274 Calls to DECL function will be replaced with calls to the dispatcher
35275 by the front-end. Returns the decl of the dispatcher function. */
35277 static tree
35278 ix86_get_function_versions_dispatcher (void *decl)
35280 tree fn = (tree) decl;
35281 struct cgraph_node *node = NULL;
35282 struct cgraph_node *default_node = NULL;
35283 struct cgraph_function_version_info *node_v = NULL;
35284 struct cgraph_function_version_info *first_v = NULL;
35286 tree dispatch_decl = NULL;
35288 struct cgraph_function_version_info *default_version_info = NULL;
35290 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
35292 node = cgraph_node::get (fn);
35293 gcc_assert (node != NULL);
35295 node_v = node->function_version ();
35296 gcc_assert (node_v != NULL);
35298 if (node_v->dispatcher_resolver != NULL)
35299 return node_v->dispatcher_resolver;
35301 /* Find the default version and make it the first node. */
35302 first_v = node_v;
35303 /* Go to the beginning of the chain. */
35304 while (first_v->prev != NULL)
35305 first_v = first_v->prev;
35306 default_version_info = first_v;
35307 while (default_version_info != NULL)
35309 if (is_function_default_version
35310 (default_version_info->this_node->decl))
35311 break;
35312 default_version_info = default_version_info->next;
35315 /* If there is no default node, just return NULL. */
35316 if (default_version_info == NULL)
35317 return NULL;
35319 /* Make default info the first node. */
35320 if (first_v != default_version_info)
35322 default_version_info->prev->next = default_version_info->next;
35323 if (default_version_info->next)
35324 default_version_info->next->prev = default_version_info->prev;
35325 first_v->prev = default_version_info;
35326 default_version_info->next = first_v;
35327 default_version_info->prev = NULL;
35330 default_node = default_version_info->this_node;
35332 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
35333 if (targetm.has_ifunc_p ())
35335 struct cgraph_function_version_info *it_v = NULL;
35336 struct cgraph_node *dispatcher_node = NULL;
35337 struct cgraph_function_version_info *dispatcher_version_info = NULL;
35339 /* Right now, the dispatching is done via ifunc. */
35340 dispatch_decl = make_dispatcher_decl (default_node->decl);
35342 dispatcher_node = cgraph_node::get_create (dispatch_decl);
35343 gcc_assert (dispatcher_node != NULL);
35344 dispatcher_node->dispatcher_function = 1;
35345 dispatcher_version_info
35346 = dispatcher_node->insert_new_function_version ();
35347 dispatcher_version_info->next = default_version_info;
35348 dispatcher_node->definition = 1;
35350 /* Set the dispatcher for all the versions. */
35351 it_v = default_version_info;
35352 while (it_v != NULL)
35354 it_v->dispatcher_resolver = dispatch_decl;
35355 it_v = it_v->next;
35358 else
35359 #endif
35361 error_at (DECL_SOURCE_LOCATION (default_node->decl),
35362 "multiversioning needs ifunc which is not supported "
35363 "on this target");
35366 return dispatch_decl;
35369 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
35370 it to CHAIN. */
35372 static tree
35373 make_attribute (const char *name, const char *arg_name, tree chain)
35375 tree attr_name;
35376 tree attr_arg_name;
35377 tree attr_args;
35378 tree attr;
35380 attr_name = get_identifier (name);
35381 attr_arg_name = build_string (strlen (arg_name), arg_name);
35382 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
35383 attr = tree_cons (attr_name, attr_args, chain);
35384 return attr;
35387 /* Make the resolver function decl to dispatch the versions of
35388 a multi-versioned function, DEFAULT_DECL. Create an
35389 empty basic block in the resolver and store the pointer in
35390 EMPTY_BB. Return the decl of the resolver function. */
35392 static tree
35393 make_resolver_func (const tree default_decl,
35394 const tree dispatch_decl,
35395 basic_block *empty_bb)
35397 char *resolver_name;
35398 tree decl, type, decl_name, t;
35399 bool is_uniq = false;
35401 /* IFUNC's have to be globally visible. So, if the default_decl is
35402 not, then the name of the IFUNC should be made unique. */
35403 if (TREE_PUBLIC (default_decl) == 0)
35404 is_uniq = true;
35406 /* Append the filename to the resolver function if the versions are
35407 not externally visible. This is because the resolver function has
35408 to be externally visible for the loader to find it. So, appending
35409 the filename will prevent conflicts with a resolver function from
35410 another module which is based on the same version name. */
35411 resolver_name = make_name (default_decl, "resolver", is_uniq);
35413 /* The resolver function should return a (void *). */
35414 type = build_function_type_list (ptr_type_node, NULL_TREE);
35416 decl = build_fn_decl (resolver_name, type);
35417 decl_name = get_identifier (resolver_name);
35418 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
35420 DECL_NAME (decl) = decl_name;
35421 TREE_USED (decl) = 1;
35422 DECL_ARTIFICIAL (decl) = 1;
35423 DECL_IGNORED_P (decl) = 0;
35424 /* IFUNC resolvers have to be externally visible. */
35425 TREE_PUBLIC (decl) = 1;
35426 DECL_UNINLINABLE (decl) = 1;
35428 /* Resolver is not external, body is generated. */
35429 DECL_EXTERNAL (decl) = 0;
35430 DECL_EXTERNAL (dispatch_decl) = 0;
35432 DECL_CONTEXT (decl) = NULL_TREE;
35433 DECL_INITIAL (decl) = make_node (BLOCK);
35434 DECL_STATIC_CONSTRUCTOR (decl) = 0;
35436 if (DECL_COMDAT_GROUP (default_decl)
35437 || TREE_PUBLIC (default_decl))
35439 /* In this case, each translation unit with a call to this
35440 versioned function will put out a resolver. Ensure it
35441 is comdat to keep just one copy. */
35442 DECL_COMDAT (decl) = 1;
35443 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
35445 /* Build result decl and add to function_decl. */
35446 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
35447 DECL_ARTIFICIAL (t) = 1;
35448 DECL_IGNORED_P (t) = 1;
35449 DECL_RESULT (decl) = t;
35451 gimplify_function_tree (decl);
35452 push_cfun (DECL_STRUCT_FUNCTION (decl));
35453 *empty_bb = init_lowered_empty_function (decl, false, 0);
35455 cgraph_node::add_new_function (decl, true);
35456 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
35458 pop_cfun ();
35460 gcc_assert (dispatch_decl != NULL);
35461 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
35462 DECL_ATTRIBUTES (dispatch_decl)
35463 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
35465 /* Create the alias for dispatch to resolver here. */
35466 /*cgraph_create_function_alias (dispatch_decl, decl);*/
35467 cgraph_node::create_same_body_alias (dispatch_decl, decl);
35468 XDELETEVEC (resolver_name);
35469 return decl;
35472 /* Generate the dispatching code body to dispatch multi-versioned function
35473 DECL. The target hook is called to process the "target" attributes and
35474 provide the code to dispatch the right function at run-time. NODE points
35475 to the dispatcher decl whose body will be created. */
35477 static tree
35478 ix86_generate_version_dispatcher_body (void *node_p)
35480 tree resolver_decl;
35481 basic_block empty_bb;
35482 tree default_ver_decl;
35483 struct cgraph_node *versn;
35484 struct cgraph_node *node;
35486 struct cgraph_function_version_info *node_version_info = NULL;
35487 struct cgraph_function_version_info *versn_info = NULL;
35489 node = (cgraph_node *)node_p;
35491 node_version_info = node->function_version ();
35492 gcc_assert (node->dispatcher_function
35493 && node_version_info != NULL);
35495 if (node_version_info->dispatcher_resolver)
35496 return node_version_info->dispatcher_resolver;
35498 /* The first version in the chain corresponds to the default version. */
35499 default_ver_decl = node_version_info->next->this_node->decl;
35501 /* node is going to be an alias, so remove the finalized bit. */
35502 node->definition = false;
35504 resolver_decl = make_resolver_func (default_ver_decl,
35505 node->decl, &empty_bb);
35507 node_version_info->dispatcher_resolver = resolver_decl;
35509 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
35511 auto_vec<tree, 2> fn_ver_vec;
35513 for (versn_info = node_version_info->next; versn_info;
35514 versn_info = versn_info->next)
35516 versn = versn_info->this_node;
35517 /* Check for virtual functions here again, as by this time it should
35518 have been determined if this function needs a vtable index or
35519 not. This happens for methods in derived classes that override
35520 virtual methods in base classes but are not explicitly marked as
35521 virtual. */
35522 if (DECL_VINDEX (versn->decl))
35523 sorry ("Virtual function multiversioning not supported");
35525 fn_ver_vec.safe_push (versn->decl);
35528 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
35529 cgraph_edge::rebuild_edges ();
35530 pop_cfun ();
35531 return resolver_decl;
35533 /* This builds the processor_model struct type defined in
35534 libgcc/config/i386/cpuinfo.c */
35536 static tree
35537 build_processor_model_struct (void)
35539 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
35540 "__cpu_features"};
35541 tree field = NULL_TREE, field_chain = NULL_TREE;
35542 int i;
35543 tree type = make_node (RECORD_TYPE);
35545 /* The first 3 fields are unsigned int. */
35546 for (i = 0; i < 3; ++i)
35548 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35549 get_identifier (field_name[i]), unsigned_type_node);
35550 if (field_chain != NULL_TREE)
35551 DECL_CHAIN (field) = field_chain;
35552 field_chain = field;
35555 /* The last field is an array of unsigned integers of size one. */
35556 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35557 get_identifier (field_name[3]),
35558 build_array_type (unsigned_type_node,
35559 build_index_type (size_one_node)));
35560 if (field_chain != NULL_TREE)
35561 DECL_CHAIN (field) = field_chain;
35562 field_chain = field;
35564 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
35565 return type;
35568 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
35570 static tree
35571 make_var_decl (tree type, const char *name)
35573 tree new_decl;
35575 new_decl = build_decl (UNKNOWN_LOCATION,
35576 VAR_DECL,
35577 get_identifier(name),
35578 type);
35580 DECL_EXTERNAL (new_decl) = 1;
35581 TREE_STATIC (new_decl) = 1;
35582 TREE_PUBLIC (new_decl) = 1;
35583 DECL_INITIAL (new_decl) = 0;
35584 DECL_ARTIFICIAL (new_decl) = 0;
35585 DECL_PRESERVE_P (new_decl) = 1;
35587 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
35588 assemble_variable (new_decl, 0, 0, 0);
35590 return new_decl;
35593 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
35594 into an integer defined in libgcc/config/i386/cpuinfo.c */
35596 static tree
35597 fold_builtin_cpu (tree fndecl, tree *args)
35599 unsigned int i;
35600 enum ix86_builtins fn_code = (enum ix86_builtins)
35601 DECL_FUNCTION_CODE (fndecl);
35602 tree param_string_cst = NULL;
35604 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
35605 enum processor_features
35607 F_CMOV = 0,
35608 F_MMX,
35609 F_POPCNT,
35610 F_SSE,
35611 F_SSE2,
35612 F_SSE3,
35613 F_SSSE3,
35614 F_SSE4_1,
35615 F_SSE4_2,
35616 F_AVX,
35617 F_AVX2,
35618 F_SSE4_A,
35619 F_FMA4,
35620 F_XOP,
35621 F_FMA,
35622 F_AVX512F,
35623 F_BMI,
35624 F_BMI2,
35625 F_AES,
35626 F_PCLMUL,
35627 F_MAX
35630 /* These are the values for vendor types and cpu types and subtypes
35631 in cpuinfo.c. Cpu types and subtypes should be subtracted by
35632 the corresponding start value. */
35633 enum processor_model
35635 M_INTEL = 1,
35636 M_AMD,
35637 M_CPU_TYPE_START,
35638 M_INTEL_BONNELL,
35639 M_INTEL_CORE2,
35640 M_INTEL_COREI7,
35641 M_AMDFAM10H,
35642 M_AMDFAM15H,
35643 M_INTEL_SILVERMONT,
35644 M_INTEL_KNL,
35645 M_AMD_BTVER1,
35646 M_AMD_BTVER2,
35647 M_CPU_SUBTYPE_START,
35648 M_INTEL_COREI7_NEHALEM,
35649 M_INTEL_COREI7_WESTMERE,
35650 M_INTEL_COREI7_SANDYBRIDGE,
35651 M_AMDFAM10H_BARCELONA,
35652 M_AMDFAM10H_SHANGHAI,
35653 M_AMDFAM10H_ISTANBUL,
35654 M_AMDFAM15H_BDVER1,
35655 M_AMDFAM15H_BDVER2,
35656 M_AMDFAM15H_BDVER3,
35657 M_AMDFAM15H_BDVER4,
35658 M_INTEL_COREI7_IVYBRIDGE,
35659 M_INTEL_COREI7_HASWELL,
35660 M_INTEL_COREI7_BROADWELL,
35661 M_INTEL_COREI7_SKYLAKE
35664 static struct _arch_names_table
35666 const char *const name;
35667 const enum processor_model model;
35669 const arch_names_table[] =
35671 {"amd", M_AMD},
35672 {"intel", M_INTEL},
35673 {"atom", M_INTEL_BONNELL},
35674 {"slm", M_INTEL_SILVERMONT},
35675 {"core2", M_INTEL_CORE2},
35676 {"corei7", M_INTEL_COREI7},
35677 {"nehalem", M_INTEL_COREI7_NEHALEM},
35678 {"westmere", M_INTEL_COREI7_WESTMERE},
35679 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
35680 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
35681 {"haswell", M_INTEL_COREI7_HASWELL},
35682 {"broadwell", M_INTEL_COREI7_BROADWELL},
35683 {"skylake", M_INTEL_COREI7_SKYLAKE},
35684 {"bonnell", M_INTEL_BONNELL},
35685 {"silvermont", M_INTEL_SILVERMONT},
35686 {"knl", M_INTEL_KNL},
35687 {"amdfam10h", M_AMDFAM10H},
35688 {"barcelona", M_AMDFAM10H_BARCELONA},
35689 {"shanghai", M_AMDFAM10H_SHANGHAI},
35690 {"istanbul", M_AMDFAM10H_ISTANBUL},
35691 {"btver1", M_AMD_BTVER1},
35692 {"amdfam15h", M_AMDFAM15H},
35693 {"bdver1", M_AMDFAM15H_BDVER1},
35694 {"bdver2", M_AMDFAM15H_BDVER2},
35695 {"bdver3", M_AMDFAM15H_BDVER3},
35696 {"bdver4", M_AMDFAM15H_BDVER4},
35697 {"btver2", M_AMD_BTVER2},
35700 static struct _isa_names_table
35702 const char *const name;
35703 const enum processor_features feature;
35705 const isa_names_table[] =
35707 {"cmov", F_CMOV},
35708 {"mmx", F_MMX},
35709 {"popcnt", F_POPCNT},
35710 {"sse", F_SSE},
35711 {"sse2", F_SSE2},
35712 {"sse3", F_SSE3},
35713 {"ssse3", F_SSSE3},
35714 {"sse4a", F_SSE4_A},
35715 {"sse4.1", F_SSE4_1},
35716 {"sse4.2", F_SSE4_2},
35717 {"avx", F_AVX},
35718 {"fma4", F_FMA4},
35719 {"xop", F_XOP},
35720 {"fma", F_FMA},
35721 {"avx2", F_AVX2},
35722 {"avx512f",F_AVX512F},
35723 {"bmi", F_BMI},
35724 {"bmi2", F_BMI2},
35725 {"aes", F_AES},
35726 {"pclmul", F_PCLMUL}
35729 tree __processor_model_type = build_processor_model_struct ();
35730 tree __cpu_model_var = make_var_decl (__processor_model_type,
35731 "__cpu_model");
35734 varpool_node::add (__cpu_model_var);
35736 gcc_assert ((args != NULL) && (*args != NULL));
35738 param_string_cst = *args;
35739 while (param_string_cst
35740 && TREE_CODE (param_string_cst) != STRING_CST)
35742 /* *args must be a expr that can contain other EXPRS leading to a
35743 STRING_CST. */
35744 if (!EXPR_P (param_string_cst))
35746 error ("Parameter to builtin must be a string constant or literal");
35747 return integer_zero_node;
35749 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
35752 gcc_assert (param_string_cst);
35754 if (fn_code == IX86_BUILTIN_CPU_IS)
35756 tree ref;
35757 tree field;
35758 tree final;
35760 unsigned int field_val = 0;
35761 unsigned int NUM_ARCH_NAMES
35762 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
35764 for (i = 0; i < NUM_ARCH_NAMES; i++)
35765 if (strcmp (arch_names_table[i].name,
35766 TREE_STRING_POINTER (param_string_cst)) == 0)
35767 break;
35769 if (i == NUM_ARCH_NAMES)
35771 error ("Parameter to builtin not valid: %s",
35772 TREE_STRING_POINTER (param_string_cst));
35773 return integer_zero_node;
35776 field = TYPE_FIELDS (__processor_model_type);
35777 field_val = arch_names_table[i].model;
35779 /* CPU types are stored in the next field. */
35780 if (field_val > M_CPU_TYPE_START
35781 && field_val < M_CPU_SUBTYPE_START)
35783 field = DECL_CHAIN (field);
35784 field_val -= M_CPU_TYPE_START;
35787 /* CPU subtypes are stored in the next field. */
35788 if (field_val > M_CPU_SUBTYPE_START)
35790 field = DECL_CHAIN ( DECL_CHAIN (field));
35791 field_val -= M_CPU_SUBTYPE_START;
35794 /* Get the appropriate field in __cpu_model. */
35795 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35796 field, NULL_TREE);
35798 /* Check the value. */
35799 final = build2 (EQ_EXPR, unsigned_type_node, ref,
35800 build_int_cstu (unsigned_type_node, field_val));
35801 return build1 (CONVERT_EXPR, integer_type_node, final);
35803 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35805 tree ref;
35806 tree array_elt;
35807 tree field;
35808 tree final;
35810 unsigned int field_val = 0;
35811 unsigned int NUM_ISA_NAMES
35812 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
35814 for (i = 0; i < NUM_ISA_NAMES; i++)
35815 if (strcmp (isa_names_table[i].name,
35816 TREE_STRING_POINTER (param_string_cst)) == 0)
35817 break;
35819 if (i == NUM_ISA_NAMES)
35821 error ("Parameter to builtin not valid: %s",
35822 TREE_STRING_POINTER (param_string_cst));
35823 return integer_zero_node;
35826 field = TYPE_FIELDS (__processor_model_type);
35827 /* Get the last field, which is __cpu_features. */
35828 while (DECL_CHAIN (field))
35829 field = DECL_CHAIN (field);
35831 /* Get the appropriate field: __cpu_model.__cpu_features */
35832 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35833 field, NULL_TREE);
35835 /* Access the 0th element of __cpu_features array. */
35836 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
35837 integer_zero_node, NULL_TREE, NULL_TREE);
35839 field_val = (1 << isa_names_table[i].feature);
35840 /* Return __cpu_model.__cpu_features[0] & field_val */
35841 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
35842 build_int_cstu (unsigned_type_node, field_val));
35843 return build1 (CONVERT_EXPR, integer_type_node, final);
35845 gcc_unreachable ();
35848 static tree
35849 ix86_fold_builtin (tree fndecl, int n_args,
35850 tree *args, bool ignore ATTRIBUTE_UNUSED)
35852 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
35854 enum ix86_builtins fn_code = (enum ix86_builtins)
35855 DECL_FUNCTION_CODE (fndecl);
35856 if (fn_code == IX86_BUILTIN_CPU_IS
35857 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35859 gcc_assert (n_args == 1);
35860 return fold_builtin_cpu (fndecl, args);
35864 #ifdef SUBTARGET_FOLD_BUILTIN
35865 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
35866 #endif
35868 return NULL_TREE;
35871 /* Make builtins to detect cpu type and features supported. NAME is
35872 the builtin name, CODE is the builtin code, and FTYPE is the function
35873 type of the builtin. */
35875 static void
35876 make_cpu_type_builtin (const char* name, int code,
35877 enum ix86_builtin_func_type ftype, bool is_const)
35879 tree decl;
35880 tree type;
35882 type = ix86_get_builtin_func_type (ftype);
35883 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
35884 NULL, NULL_TREE);
35885 gcc_assert (decl != NULL_TREE);
35886 ix86_builtins[(int) code] = decl;
35887 TREE_READONLY (decl) = is_const;
35890 /* Make builtins to get CPU type and features supported. The created
35891 builtins are :
35893 __builtin_cpu_init (), to detect cpu type and features,
35894 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
35895 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
35898 static void
35899 ix86_init_platform_type_builtins (void)
35901 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
35902 INT_FTYPE_VOID, false);
35903 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
35904 INT_FTYPE_PCCHAR, true);
35905 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
35906 INT_FTYPE_PCCHAR, true);
35909 /* Internal method for ix86_init_builtins. */
35911 static void
35912 ix86_init_builtins_va_builtins_abi (void)
35914 tree ms_va_ref, sysv_va_ref;
35915 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
35916 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
35917 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
35918 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
35920 if (!TARGET_64BIT)
35921 return;
35922 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
35923 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
35924 ms_va_ref = build_reference_type (ms_va_list_type_node);
35925 sysv_va_ref =
35926 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
35928 fnvoid_va_end_ms =
35929 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35930 fnvoid_va_start_ms =
35931 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35932 fnvoid_va_end_sysv =
35933 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
35934 fnvoid_va_start_sysv =
35935 build_varargs_function_type_list (void_type_node, sysv_va_ref,
35936 NULL_TREE);
35937 fnvoid_va_copy_ms =
35938 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
35939 NULL_TREE);
35940 fnvoid_va_copy_sysv =
35941 build_function_type_list (void_type_node, sysv_va_ref,
35942 sysv_va_ref, NULL_TREE);
35944 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
35945 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
35946 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
35947 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
35948 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
35949 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
35950 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
35951 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35952 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
35953 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35954 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
35955 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35958 static void
35959 ix86_init_builtin_types (void)
35961 tree float128_type_node, float80_type_node;
35963 /* The __float80 type. */
35964 float80_type_node = long_double_type_node;
35965 if (TYPE_MODE (float80_type_node) != XFmode)
35967 /* The __float80 type. */
35968 float80_type_node = make_node (REAL_TYPE);
35970 TYPE_PRECISION (float80_type_node) = 80;
35971 layout_type (float80_type_node);
35973 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
35975 /* The __float128 type. */
35976 float128_type_node = make_node (REAL_TYPE);
35977 TYPE_PRECISION (float128_type_node) = 128;
35978 layout_type (float128_type_node);
35979 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
35981 /* This macro is built by i386-builtin-types.awk. */
35982 DEFINE_BUILTIN_PRIMITIVE_TYPES;
35985 static void
35986 ix86_init_builtins (void)
35988 tree t;
35990 ix86_init_builtin_types ();
35992 /* Builtins to get CPU type and features. */
35993 ix86_init_platform_type_builtins ();
35995 /* TFmode support builtins. */
35996 def_builtin_const (0, "__builtin_infq",
35997 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
35998 def_builtin_const (0, "__builtin_huge_valq",
35999 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
36001 /* We will expand them to normal call if SSE isn't available since
36002 they are used by libgcc. */
36003 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
36004 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
36005 BUILT_IN_MD, "__fabstf2", NULL_TREE);
36006 TREE_READONLY (t) = 1;
36007 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
36009 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
36010 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
36011 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
36012 TREE_READONLY (t) = 1;
36013 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
36015 ix86_init_tm_builtins ();
36016 ix86_init_mmx_sse_builtins ();
36017 ix86_init_mpx_builtins ();
36019 if (TARGET_LP64)
36020 ix86_init_builtins_va_builtins_abi ();
36022 #ifdef SUBTARGET_INIT_BUILTINS
36023 SUBTARGET_INIT_BUILTINS;
36024 #endif
36027 /* Return the ix86 builtin for CODE. */
36029 static tree
36030 ix86_builtin_decl (unsigned code, bool)
36032 if (code >= IX86_BUILTIN_MAX)
36033 return error_mark_node;
36035 return ix86_builtins[code];
36038 /* Errors in the source file can cause expand_expr to return const0_rtx
36039 where we expect a vector. To avoid crashing, use one of the vector
36040 clear instructions. */
36041 static rtx
36042 safe_vector_operand (rtx x, machine_mode mode)
36044 if (x == const0_rtx)
36045 x = CONST0_RTX (mode);
36046 return x;
36049 /* Fixup modeless constants to fit required mode. */
36050 static rtx
36051 fixup_modeless_constant (rtx x, machine_mode mode)
36053 if (GET_MODE (x) == VOIDmode)
36054 x = convert_to_mode (mode, x, 1);
36055 return x;
36058 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
36060 static rtx
36061 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
36063 rtx pat;
36064 tree arg0 = CALL_EXPR_ARG (exp, 0);
36065 tree arg1 = CALL_EXPR_ARG (exp, 1);
36066 rtx op0 = expand_normal (arg0);
36067 rtx op1 = expand_normal (arg1);
36068 machine_mode tmode = insn_data[icode].operand[0].mode;
36069 machine_mode mode0 = insn_data[icode].operand[1].mode;
36070 machine_mode mode1 = insn_data[icode].operand[2].mode;
36072 if (VECTOR_MODE_P (mode0))
36073 op0 = safe_vector_operand (op0, mode0);
36074 if (VECTOR_MODE_P (mode1))
36075 op1 = safe_vector_operand (op1, mode1);
36077 if (optimize || !target
36078 || GET_MODE (target) != tmode
36079 || !insn_data[icode].operand[0].predicate (target, tmode))
36080 target = gen_reg_rtx (tmode);
36082 if (GET_MODE (op1) == SImode && mode1 == TImode)
36084 rtx x = gen_reg_rtx (V4SImode);
36085 emit_insn (gen_sse2_loadd (x, op1));
36086 op1 = gen_lowpart (TImode, x);
36089 if (!insn_data[icode].operand[1].predicate (op0, mode0))
36090 op0 = copy_to_mode_reg (mode0, op0);
36091 if (!insn_data[icode].operand[2].predicate (op1, mode1))
36092 op1 = copy_to_mode_reg (mode1, op1);
36094 pat = GEN_FCN (icode) (target, op0, op1);
36095 if (! pat)
36096 return 0;
36098 emit_insn (pat);
36100 return target;
36103 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
36105 static rtx
36106 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
36107 enum ix86_builtin_func_type m_type,
36108 enum rtx_code sub_code)
36110 rtx pat;
36111 int i;
36112 int nargs;
36113 bool comparison_p = false;
36114 bool tf_p = false;
36115 bool last_arg_constant = false;
36116 int num_memory = 0;
36117 struct {
36118 rtx op;
36119 machine_mode mode;
36120 } args[4];
36122 machine_mode tmode = insn_data[icode].operand[0].mode;
36124 switch (m_type)
36126 case MULTI_ARG_4_DF2_DI_I:
36127 case MULTI_ARG_4_DF2_DI_I1:
36128 case MULTI_ARG_4_SF2_SI_I:
36129 case MULTI_ARG_4_SF2_SI_I1:
36130 nargs = 4;
36131 last_arg_constant = true;
36132 break;
36134 case MULTI_ARG_3_SF:
36135 case MULTI_ARG_3_DF:
36136 case MULTI_ARG_3_SF2:
36137 case MULTI_ARG_3_DF2:
36138 case MULTI_ARG_3_DI:
36139 case MULTI_ARG_3_SI:
36140 case MULTI_ARG_3_SI_DI:
36141 case MULTI_ARG_3_HI:
36142 case MULTI_ARG_3_HI_SI:
36143 case MULTI_ARG_3_QI:
36144 case MULTI_ARG_3_DI2:
36145 case MULTI_ARG_3_SI2:
36146 case MULTI_ARG_3_HI2:
36147 case MULTI_ARG_3_QI2:
36148 nargs = 3;
36149 break;
36151 case MULTI_ARG_2_SF:
36152 case MULTI_ARG_2_DF:
36153 case MULTI_ARG_2_DI:
36154 case MULTI_ARG_2_SI:
36155 case MULTI_ARG_2_HI:
36156 case MULTI_ARG_2_QI:
36157 nargs = 2;
36158 break;
36160 case MULTI_ARG_2_DI_IMM:
36161 case MULTI_ARG_2_SI_IMM:
36162 case MULTI_ARG_2_HI_IMM:
36163 case MULTI_ARG_2_QI_IMM:
36164 nargs = 2;
36165 last_arg_constant = true;
36166 break;
36168 case MULTI_ARG_1_SF:
36169 case MULTI_ARG_1_DF:
36170 case MULTI_ARG_1_SF2:
36171 case MULTI_ARG_1_DF2:
36172 case MULTI_ARG_1_DI:
36173 case MULTI_ARG_1_SI:
36174 case MULTI_ARG_1_HI:
36175 case MULTI_ARG_1_QI:
36176 case MULTI_ARG_1_SI_DI:
36177 case MULTI_ARG_1_HI_DI:
36178 case MULTI_ARG_1_HI_SI:
36179 case MULTI_ARG_1_QI_DI:
36180 case MULTI_ARG_1_QI_SI:
36181 case MULTI_ARG_1_QI_HI:
36182 nargs = 1;
36183 break;
36185 case MULTI_ARG_2_DI_CMP:
36186 case MULTI_ARG_2_SI_CMP:
36187 case MULTI_ARG_2_HI_CMP:
36188 case MULTI_ARG_2_QI_CMP:
36189 nargs = 2;
36190 comparison_p = true;
36191 break;
36193 case MULTI_ARG_2_SF_TF:
36194 case MULTI_ARG_2_DF_TF:
36195 case MULTI_ARG_2_DI_TF:
36196 case MULTI_ARG_2_SI_TF:
36197 case MULTI_ARG_2_HI_TF:
36198 case MULTI_ARG_2_QI_TF:
36199 nargs = 2;
36200 tf_p = true;
36201 break;
36203 default:
36204 gcc_unreachable ();
36207 if (optimize || !target
36208 || GET_MODE (target) != tmode
36209 || !insn_data[icode].operand[0].predicate (target, tmode))
36210 target = gen_reg_rtx (tmode);
36212 gcc_assert (nargs <= 4);
36214 for (i = 0; i < nargs; i++)
36216 tree arg = CALL_EXPR_ARG (exp, i);
36217 rtx op = expand_normal (arg);
36218 int adjust = (comparison_p) ? 1 : 0;
36219 machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
36221 if (last_arg_constant && i == nargs - 1)
36223 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
36225 enum insn_code new_icode = icode;
36226 switch (icode)
36228 case CODE_FOR_xop_vpermil2v2df3:
36229 case CODE_FOR_xop_vpermil2v4sf3:
36230 case CODE_FOR_xop_vpermil2v4df3:
36231 case CODE_FOR_xop_vpermil2v8sf3:
36232 error ("the last argument must be a 2-bit immediate");
36233 return gen_reg_rtx (tmode);
36234 case CODE_FOR_xop_rotlv2di3:
36235 new_icode = CODE_FOR_rotlv2di3;
36236 goto xop_rotl;
36237 case CODE_FOR_xop_rotlv4si3:
36238 new_icode = CODE_FOR_rotlv4si3;
36239 goto xop_rotl;
36240 case CODE_FOR_xop_rotlv8hi3:
36241 new_icode = CODE_FOR_rotlv8hi3;
36242 goto xop_rotl;
36243 case CODE_FOR_xop_rotlv16qi3:
36244 new_icode = CODE_FOR_rotlv16qi3;
36245 xop_rotl:
36246 if (CONST_INT_P (op))
36248 int mask = GET_MODE_UNIT_BITSIZE (tmode) - 1;
36249 op = GEN_INT (INTVAL (op) & mask);
36250 gcc_checking_assert
36251 (insn_data[icode].operand[i + 1].predicate (op, mode));
36253 else
36255 gcc_checking_assert
36256 (nargs == 2
36257 && insn_data[new_icode].operand[0].mode == tmode
36258 && insn_data[new_icode].operand[1].mode == tmode
36259 && insn_data[new_icode].operand[2].mode == mode
36260 && insn_data[new_icode].operand[0].predicate
36261 == insn_data[icode].operand[0].predicate
36262 && insn_data[new_icode].operand[1].predicate
36263 == insn_data[icode].operand[1].predicate);
36264 icode = new_icode;
36265 goto non_constant;
36267 break;
36268 default:
36269 gcc_unreachable ();
36273 else
36275 non_constant:
36276 if (VECTOR_MODE_P (mode))
36277 op = safe_vector_operand (op, mode);
36279 /* If we aren't optimizing, only allow one memory operand to be
36280 generated. */
36281 if (memory_operand (op, mode))
36282 num_memory++;
36284 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
36286 if (optimize
36287 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
36288 || num_memory > 1)
36289 op = force_reg (mode, op);
36292 args[i].op = op;
36293 args[i].mode = mode;
36296 switch (nargs)
36298 case 1:
36299 pat = GEN_FCN (icode) (target, args[0].op);
36300 break;
36302 case 2:
36303 if (tf_p)
36304 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
36305 GEN_INT ((int)sub_code));
36306 else if (! comparison_p)
36307 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
36308 else
36310 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
36311 args[0].op,
36312 args[1].op);
36314 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
36316 break;
36318 case 3:
36319 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
36320 break;
36322 case 4:
36323 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
36324 break;
36326 default:
36327 gcc_unreachable ();
36330 if (! pat)
36331 return 0;
36333 emit_insn (pat);
36334 return target;
36337 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
36338 insns with vec_merge. */
36340 static rtx
36341 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
36342 rtx target)
36344 rtx pat;
36345 tree arg0 = CALL_EXPR_ARG (exp, 0);
36346 rtx op1, op0 = expand_normal (arg0);
36347 machine_mode tmode = insn_data[icode].operand[0].mode;
36348 machine_mode mode0 = insn_data[icode].operand[1].mode;
36350 if (optimize || !target
36351 || GET_MODE (target) != tmode
36352 || !insn_data[icode].operand[0].predicate (target, tmode))
36353 target = gen_reg_rtx (tmode);
36355 if (VECTOR_MODE_P (mode0))
36356 op0 = safe_vector_operand (op0, mode0);
36358 if ((optimize && !register_operand (op0, mode0))
36359 || !insn_data[icode].operand[1].predicate (op0, mode0))
36360 op0 = copy_to_mode_reg (mode0, op0);
36362 op1 = op0;
36363 if (!insn_data[icode].operand[2].predicate (op1, mode0))
36364 op1 = copy_to_mode_reg (mode0, op1);
36366 pat = GEN_FCN (icode) (target, op0, op1);
36367 if (! pat)
36368 return 0;
36369 emit_insn (pat);
36370 return target;
36373 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
36375 static rtx
36376 ix86_expand_sse_compare (const struct builtin_description *d,
36377 tree exp, rtx target, bool swap)
36379 rtx pat;
36380 tree arg0 = CALL_EXPR_ARG (exp, 0);
36381 tree arg1 = CALL_EXPR_ARG (exp, 1);
36382 rtx op0 = expand_normal (arg0);
36383 rtx op1 = expand_normal (arg1);
36384 rtx op2;
36385 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36386 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36387 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36388 enum rtx_code comparison = d->comparison;
36390 if (VECTOR_MODE_P (mode0))
36391 op0 = safe_vector_operand (op0, mode0);
36392 if (VECTOR_MODE_P (mode1))
36393 op1 = safe_vector_operand (op1, mode1);
36395 /* Swap operands if we have a comparison that isn't available in
36396 hardware. */
36397 if (swap)
36398 std::swap (op0, op1);
36400 if (optimize || !target
36401 || GET_MODE (target) != tmode
36402 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36403 target = gen_reg_rtx (tmode);
36405 if ((optimize && !register_operand (op0, mode0))
36406 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
36407 op0 = copy_to_mode_reg (mode0, op0);
36408 if ((optimize && !register_operand (op1, mode1))
36409 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
36410 op1 = copy_to_mode_reg (mode1, op1);
36412 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
36413 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36414 if (! pat)
36415 return 0;
36416 emit_insn (pat);
36417 return target;
36420 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
36422 static rtx
36423 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
36424 rtx target)
36426 rtx pat;
36427 tree arg0 = CALL_EXPR_ARG (exp, 0);
36428 tree arg1 = CALL_EXPR_ARG (exp, 1);
36429 rtx op0 = expand_normal (arg0);
36430 rtx op1 = expand_normal (arg1);
36431 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36432 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36433 enum rtx_code comparison = d->comparison;
36435 if (VECTOR_MODE_P (mode0))
36436 op0 = safe_vector_operand (op0, mode0);
36437 if (VECTOR_MODE_P (mode1))
36438 op1 = safe_vector_operand (op1, mode1);
36440 /* Swap operands if we have a comparison that isn't available in
36441 hardware. */
36442 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
36443 std::swap (op0, op1);
36445 target = gen_reg_rtx (SImode);
36446 emit_move_insn (target, const0_rtx);
36447 target = gen_rtx_SUBREG (QImode, target, 0);
36449 if ((optimize && !register_operand (op0, mode0))
36450 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36451 op0 = copy_to_mode_reg (mode0, op0);
36452 if ((optimize && !register_operand (op1, mode1))
36453 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36454 op1 = copy_to_mode_reg (mode1, op1);
36456 pat = GEN_FCN (d->icode) (op0, op1);
36457 if (! pat)
36458 return 0;
36459 emit_insn (pat);
36460 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36461 gen_rtx_fmt_ee (comparison, QImode,
36462 SET_DEST (pat),
36463 const0_rtx)));
36465 return SUBREG_REG (target);
36468 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
36470 static rtx
36471 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
36472 rtx target)
36474 rtx pat;
36475 tree arg0 = CALL_EXPR_ARG (exp, 0);
36476 rtx op1, op0 = expand_normal (arg0);
36477 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36478 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36480 if (optimize || target == 0
36481 || GET_MODE (target) != tmode
36482 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36483 target = gen_reg_rtx (tmode);
36485 if (VECTOR_MODE_P (mode0))
36486 op0 = safe_vector_operand (op0, mode0);
36488 if ((optimize && !register_operand (op0, mode0))
36489 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36490 op0 = copy_to_mode_reg (mode0, op0);
36492 op1 = GEN_INT (d->comparison);
36494 pat = GEN_FCN (d->icode) (target, op0, op1);
36495 if (! pat)
36496 return 0;
36497 emit_insn (pat);
36498 return target;
36501 static rtx
36502 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
36503 tree exp, rtx target)
36505 rtx pat;
36506 tree arg0 = CALL_EXPR_ARG (exp, 0);
36507 tree arg1 = CALL_EXPR_ARG (exp, 1);
36508 rtx op0 = expand_normal (arg0);
36509 rtx op1 = expand_normal (arg1);
36510 rtx op2;
36511 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36512 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36513 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36515 if (optimize || target == 0
36516 || GET_MODE (target) != tmode
36517 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36518 target = gen_reg_rtx (tmode);
36520 op0 = safe_vector_operand (op0, mode0);
36521 op1 = safe_vector_operand (op1, mode1);
36523 if ((optimize && !register_operand (op0, mode0))
36524 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36525 op0 = copy_to_mode_reg (mode0, op0);
36526 if ((optimize && !register_operand (op1, mode1))
36527 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36528 op1 = copy_to_mode_reg (mode1, op1);
36530 op2 = GEN_INT (d->comparison);
36532 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36533 if (! pat)
36534 return 0;
36535 emit_insn (pat);
36536 return target;
36539 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
36541 static rtx
36542 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
36543 rtx target)
36545 rtx pat;
36546 tree arg0 = CALL_EXPR_ARG (exp, 0);
36547 tree arg1 = CALL_EXPR_ARG (exp, 1);
36548 rtx op0 = expand_normal (arg0);
36549 rtx op1 = expand_normal (arg1);
36550 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36551 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36552 enum rtx_code comparison = d->comparison;
36554 if (VECTOR_MODE_P (mode0))
36555 op0 = safe_vector_operand (op0, mode0);
36556 if (VECTOR_MODE_P (mode1))
36557 op1 = safe_vector_operand (op1, mode1);
36559 target = gen_reg_rtx (SImode);
36560 emit_move_insn (target, const0_rtx);
36561 target = gen_rtx_SUBREG (QImode, target, 0);
36563 if ((optimize && !register_operand (op0, mode0))
36564 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36565 op0 = copy_to_mode_reg (mode0, op0);
36566 if ((optimize && !register_operand (op1, mode1))
36567 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36568 op1 = copy_to_mode_reg (mode1, op1);
36570 pat = GEN_FCN (d->icode) (op0, op1);
36571 if (! pat)
36572 return 0;
36573 emit_insn (pat);
36574 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36575 gen_rtx_fmt_ee (comparison, QImode,
36576 SET_DEST (pat),
36577 const0_rtx)));
36579 return SUBREG_REG (target);
36582 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
36584 static rtx
36585 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
36586 tree exp, rtx target)
36588 rtx pat;
36589 tree arg0 = CALL_EXPR_ARG (exp, 0);
36590 tree arg1 = CALL_EXPR_ARG (exp, 1);
36591 tree arg2 = CALL_EXPR_ARG (exp, 2);
36592 tree arg3 = CALL_EXPR_ARG (exp, 3);
36593 tree arg4 = CALL_EXPR_ARG (exp, 4);
36594 rtx scratch0, scratch1;
36595 rtx op0 = expand_normal (arg0);
36596 rtx op1 = expand_normal (arg1);
36597 rtx op2 = expand_normal (arg2);
36598 rtx op3 = expand_normal (arg3);
36599 rtx op4 = expand_normal (arg4);
36600 machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
36602 tmode0 = insn_data[d->icode].operand[0].mode;
36603 tmode1 = insn_data[d->icode].operand[1].mode;
36604 modev2 = insn_data[d->icode].operand[2].mode;
36605 modei3 = insn_data[d->icode].operand[3].mode;
36606 modev4 = insn_data[d->icode].operand[4].mode;
36607 modei5 = insn_data[d->icode].operand[5].mode;
36608 modeimm = insn_data[d->icode].operand[6].mode;
36610 if (VECTOR_MODE_P (modev2))
36611 op0 = safe_vector_operand (op0, modev2);
36612 if (VECTOR_MODE_P (modev4))
36613 op2 = safe_vector_operand (op2, modev4);
36615 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36616 op0 = copy_to_mode_reg (modev2, op0);
36617 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
36618 op1 = copy_to_mode_reg (modei3, op1);
36619 if ((optimize && !register_operand (op2, modev4))
36620 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
36621 op2 = copy_to_mode_reg (modev4, op2);
36622 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
36623 op3 = copy_to_mode_reg (modei5, op3);
36625 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
36627 error ("the fifth argument must be an 8-bit immediate");
36628 return const0_rtx;
36631 if (d->code == IX86_BUILTIN_PCMPESTRI128)
36633 if (optimize || !target
36634 || GET_MODE (target) != tmode0
36635 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36636 target = gen_reg_rtx (tmode0);
36638 scratch1 = gen_reg_rtx (tmode1);
36640 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
36642 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
36644 if (optimize || !target
36645 || GET_MODE (target) != tmode1
36646 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36647 target = gen_reg_rtx (tmode1);
36649 scratch0 = gen_reg_rtx (tmode0);
36651 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
36653 else
36655 gcc_assert (d->flag);
36657 scratch0 = gen_reg_rtx (tmode0);
36658 scratch1 = gen_reg_rtx (tmode1);
36660 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
36663 if (! pat)
36664 return 0;
36666 emit_insn (pat);
36668 if (d->flag)
36670 target = gen_reg_rtx (SImode);
36671 emit_move_insn (target, const0_rtx);
36672 target = gen_rtx_SUBREG (QImode, target, 0);
36674 emit_insn
36675 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36676 gen_rtx_fmt_ee (EQ, QImode,
36677 gen_rtx_REG ((machine_mode) d->flag,
36678 FLAGS_REG),
36679 const0_rtx)));
36680 return SUBREG_REG (target);
36682 else
36683 return target;
36687 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
36689 static rtx
36690 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
36691 tree exp, rtx target)
36693 rtx pat;
36694 tree arg0 = CALL_EXPR_ARG (exp, 0);
36695 tree arg1 = CALL_EXPR_ARG (exp, 1);
36696 tree arg2 = CALL_EXPR_ARG (exp, 2);
36697 rtx scratch0, scratch1;
36698 rtx op0 = expand_normal (arg0);
36699 rtx op1 = expand_normal (arg1);
36700 rtx op2 = expand_normal (arg2);
36701 machine_mode tmode0, tmode1, modev2, modev3, modeimm;
36703 tmode0 = insn_data[d->icode].operand[0].mode;
36704 tmode1 = insn_data[d->icode].operand[1].mode;
36705 modev2 = insn_data[d->icode].operand[2].mode;
36706 modev3 = insn_data[d->icode].operand[3].mode;
36707 modeimm = insn_data[d->icode].operand[4].mode;
36709 if (VECTOR_MODE_P (modev2))
36710 op0 = safe_vector_operand (op0, modev2);
36711 if (VECTOR_MODE_P (modev3))
36712 op1 = safe_vector_operand (op1, modev3);
36714 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36715 op0 = copy_to_mode_reg (modev2, op0);
36716 if ((optimize && !register_operand (op1, modev3))
36717 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
36718 op1 = copy_to_mode_reg (modev3, op1);
36720 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
36722 error ("the third argument must be an 8-bit immediate");
36723 return const0_rtx;
36726 if (d->code == IX86_BUILTIN_PCMPISTRI128)
36728 if (optimize || !target
36729 || GET_MODE (target) != tmode0
36730 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36731 target = gen_reg_rtx (tmode0);
36733 scratch1 = gen_reg_rtx (tmode1);
36735 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
36737 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
36739 if (optimize || !target
36740 || GET_MODE (target) != tmode1
36741 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36742 target = gen_reg_rtx (tmode1);
36744 scratch0 = gen_reg_rtx (tmode0);
36746 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
36748 else
36750 gcc_assert (d->flag);
36752 scratch0 = gen_reg_rtx (tmode0);
36753 scratch1 = gen_reg_rtx (tmode1);
36755 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
36758 if (! pat)
36759 return 0;
36761 emit_insn (pat);
36763 if (d->flag)
36765 target = gen_reg_rtx (SImode);
36766 emit_move_insn (target, const0_rtx);
36767 target = gen_rtx_SUBREG (QImode, target, 0);
36769 emit_insn
36770 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36771 gen_rtx_fmt_ee (EQ, QImode,
36772 gen_rtx_REG ((machine_mode) d->flag,
36773 FLAGS_REG),
36774 const0_rtx)));
36775 return SUBREG_REG (target);
36777 else
36778 return target;
36781 /* Subroutine of ix86_expand_builtin to take care of insns with
36782 variable number of operands. */
36784 static rtx
36785 ix86_expand_args_builtin (const struct builtin_description *d,
36786 tree exp, rtx target)
36788 rtx pat, real_target;
36789 unsigned int i, nargs;
36790 unsigned int nargs_constant = 0;
36791 unsigned int mask_pos = 0;
36792 int num_memory = 0;
36793 struct
36795 rtx op;
36796 machine_mode mode;
36797 } args[6];
36798 bool last_arg_count = false;
36799 enum insn_code icode = d->icode;
36800 const struct insn_data_d *insn_p = &insn_data[icode];
36801 machine_mode tmode = insn_p->operand[0].mode;
36802 machine_mode rmode = VOIDmode;
36803 bool swap = false;
36804 enum rtx_code comparison = d->comparison;
36806 switch ((enum ix86_builtin_func_type) d->flag)
36808 case V2DF_FTYPE_V2DF_ROUND:
36809 case V4DF_FTYPE_V4DF_ROUND:
36810 case V4SF_FTYPE_V4SF_ROUND:
36811 case V8SF_FTYPE_V8SF_ROUND:
36812 case V4SI_FTYPE_V4SF_ROUND:
36813 case V8SI_FTYPE_V8SF_ROUND:
36814 return ix86_expand_sse_round (d, exp, target);
36815 case V4SI_FTYPE_V2DF_V2DF_ROUND:
36816 case V8SI_FTYPE_V4DF_V4DF_ROUND:
36817 case V16SI_FTYPE_V8DF_V8DF_ROUND:
36818 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
36819 case INT_FTYPE_V8SF_V8SF_PTEST:
36820 case INT_FTYPE_V4DI_V4DI_PTEST:
36821 case INT_FTYPE_V4DF_V4DF_PTEST:
36822 case INT_FTYPE_V4SF_V4SF_PTEST:
36823 case INT_FTYPE_V2DI_V2DI_PTEST:
36824 case INT_FTYPE_V2DF_V2DF_PTEST:
36825 return ix86_expand_sse_ptest (d, exp, target);
36826 case FLOAT128_FTYPE_FLOAT128:
36827 case FLOAT_FTYPE_FLOAT:
36828 case INT_FTYPE_INT:
36829 case UINT64_FTYPE_INT:
36830 case UINT16_FTYPE_UINT16:
36831 case INT64_FTYPE_INT64:
36832 case INT64_FTYPE_V4SF:
36833 case INT64_FTYPE_V2DF:
36834 case INT_FTYPE_V16QI:
36835 case INT_FTYPE_V8QI:
36836 case INT_FTYPE_V8SF:
36837 case INT_FTYPE_V4DF:
36838 case INT_FTYPE_V4SF:
36839 case INT_FTYPE_V2DF:
36840 case INT_FTYPE_V32QI:
36841 case V16QI_FTYPE_V16QI:
36842 case V8SI_FTYPE_V8SF:
36843 case V8SI_FTYPE_V4SI:
36844 case V8HI_FTYPE_V8HI:
36845 case V8HI_FTYPE_V16QI:
36846 case V8QI_FTYPE_V8QI:
36847 case V8SF_FTYPE_V8SF:
36848 case V8SF_FTYPE_V8SI:
36849 case V8SF_FTYPE_V4SF:
36850 case V8SF_FTYPE_V8HI:
36851 case V4SI_FTYPE_V4SI:
36852 case V4SI_FTYPE_V16QI:
36853 case V4SI_FTYPE_V4SF:
36854 case V4SI_FTYPE_V8SI:
36855 case V4SI_FTYPE_V8HI:
36856 case V4SI_FTYPE_V4DF:
36857 case V4SI_FTYPE_V2DF:
36858 case V4HI_FTYPE_V4HI:
36859 case V4DF_FTYPE_V4DF:
36860 case V4DF_FTYPE_V4SI:
36861 case V4DF_FTYPE_V4SF:
36862 case V4DF_FTYPE_V2DF:
36863 case V4SF_FTYPE_V4SF:
36864 case V4SF_FTYPE_V4SI:
36865 case V4SF_FTYPE_V8SF:
36866 case V4SF_FTYPE_V4DF:
36867 case V4SF_FTYPE_V8HI:
36868 case V4SF_FTYPE_V2DF:
36869 case V2DI_FTYPE_V2DI:
36870 case V2DI_FTYPE_V16QI:
36871 case V2DI_FTYPE_V8HI:
36872 case V2DI_FTYPE_V4SI:
36873 case V2DF_FTYPE_V2DF:
36874 case V2DF_FTYPE_V4SI:
36875 case V2DF_FTYPE_V4DF:
36876 case V2DF_FTYPE_V4SF:
36877 case V2DF_FTYPE_V2SI:
36878 case V2SI_FTYPE_V2SI:
36879 case V2SI_FTYPE_V4SF:
36880 case V2SI_FTYPE_V2SF:
36881 case V2SI_FTYPE_V2DF:
36882 case V2SF_FTYPE_V2SF:
36883 case V2SF_FTYPE_V2SI:
36884 case V32QI_FTYPE_V32QI:
36885 case V32QI_FTYPE_V16QI:
36886 case V16HI_FTYPE_V16HI:
36887 case V16HI_FTYPE_V8HI:
36888 case V8SI_FTYPE_V8SI:
36889 case V16HI_FTYPE_V16QI:
36890 case V8SI_FTYPE_V16QI:
36891 case V4DI_FTYPE_V16QI:
36892 case V8SI_FTYPE_V8HI:
36893 case V4DI_FTYPE_V8HI:
36894 case V4DI_FTYPE_V4SI:
36895 case V4DI_FTYPE_V2DI:
36896 case UHI_FTYPE_UHI:
36897 case UHI_FTYPE_V16QI:
36898 case USI_FTYPE_V32QI:
36899 case UDI_FTYPE_V64QI:
36900 case V16QI_FTYPE_UHI:
36901 case V32QI_FTYPE_USI:
36902 case V64QI_FTYPE_UDI:
36903 case V8HI_FTYPE_UQI:
36904 case V16HI_FTYPE_UHI:
36905 case V32HI_FTYPE_USI:
36906 case V4SI_FTYPE_UQI:
36907 case V8SI_FTYPE_UQI:
36908 case V4SI_FTYPE_UHI:
36909 case V8SI_FTYPE_UHI:
36910 case UQI_FTYPE_V8HI:
36911 case UHI_FTYPE_V16HI:
36912 case USI_FTYPE_V32HI:
36913 case UQI_FTYPE_V4SI:
36914 case UQI_FTYPE_V8SI:
36915 case UHI_FTYPE_V16SI:
36916 case UQI_FTYPE_V2DI:
36917 case UQI_FTYPE_V4DI:
36918 case UQI_FTYPE_V8DI:
36919 case V16SI_FTYPE_UHI:
36920 case V2DI_FTYPE_UQI:
36921 case V4DI_FTYPE_UQI:
36922 case V16SI_FTYPE_INT:
36923 case V16SF_FTYPE_V8SF:
36924 case V16SI_FTYPE_V8SI:
36925 case V16SF_FTYPE_V4SF:
36926 case V16SI_FTYPE_V4SI:
36927 case V16SF_FTYPE_V16SF:
36928 case V8DI_FTYPE_UQI:
36929 case V8DF_FTYPE_V4DF:
36930 case V8DF_FTYPE_V2DF:
36931 case V8DF_FTYPE_V8DF:
36932 nargs = 1;
36933 break;
36934 case V4SF_FTYPE_V4SF_VEC_MERGE:
36935 case V2DF_FTYPE_V2DF_VEC_MERGE:
36936 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
36937 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
36938 case V16QI_FTYPE_V16QI_V16QI:
36939 case V16QI_FTYPE_V8HI_V8HI:
36940 case V16SF_FTYPE_V16SF_V16SF:
36941 case V8QI_FTYPE_V8QI_V8QI:
36942 case V8QI_FTYPE_V4HI_V4HI:
36943 case V8HI_FTYPE_V8HI_V8HI:
36944 case V8HI_FTYPE_V16QI_V16QI:
36945 case V8HI_FTYPE_V4SI_V4SI:
36946 case V8SF_FTYPE_V8SF_V8SF:
36947 case V8SF_FTYPE_V8SF_V8SI:
36948 case V8DF_FTYPE_V8DF_V8DF:
36949 case V4SI_FTYPE_V4SI_V4SI:
36950 case V4SI_FTYPE_V8HI_V8HI:
36951 case V4SI_FTYPE_V2DF_V2DF:
36952 case V4HI_FTYPE_V4HI_V4HI:
36953 case V4HI_FTYPE_V8QI_V8QI:
36954 case V4HI_FTYPE_V2SI_V2SI:
36955 case V4DF_FTYPE_V4DF_V4DF:
36956 case V4DF_FTYPE_V4DF_V4DI:
36957 case V4SF_FTYPE_V4SF_V4SF:
36958 case V4SF_FTYPE_V4SF_V4SI:
36959 case V4SF_FTYPE_V4SF_V2SI:
36960 case V4SF_FTYPE_V4SF_V2DF:
36961 case V4SF_FTYPE_V4SF_UINT:
36962 case V4SF_FTYPE_V4SF_DI:
36963 case V4SF_FTYPE_V4SF_SI:
36964 case V2DI_FTYPE_V2DI_V2DI:
36965 case V2DI_FTYPE_V16QI_V16QI:
36966 case V2DI_FTYPE_V4SI_V4SI:
36967 case V2DI_FTYPE_V2DI_V16QI:
36968 case V2SI_FTYPE_V2SI_V2SI:
36969 case V2SI_FTYPE_V4HI_V4HI:
36970 case V2SI_FTYPE_V2SF_V2SF:
36971 case V2DF_FTYPE_V2DF_V2DF:
36972 case V2DF_FTYPE_V2DF_V4SF:
36973 case V2DF_FTYPE_V2DF_V2DI:
36974 case V2DF_FTYPE_V2DF_DI:
36975 case V2DF_FTYPE_V2DF_SI:
36976 case V2DF_FTYPE_V2DF_UINT:
36977 case V2SF_FTYPE_V2SF_V2SF:
36978 case V1DI_FTYPE_V1DI_V1DI:
36979 case V1DI_FTYPE_V8QI_V8QI:
36980 case V1DI_FTYPE_V2SI_V2SI:
36981 case V32QI_FTYPE_V16HI_V16HI:
36982 case V16HI_FTYPE_V8SI_V8SI:
36983 case V32QI_FTYPE_V32QI_V32QI:
36984 case V16HI_FTYPE_V32QI_V32QI:
36985 case V16HI_FTYPE_V16HI_V16HI:
36986 case V8SI_FTYPE_V4DF_V4DF:
36987 case V8SI_FTYPE_V8SI_V8SI:
36988 case V8SI_FTYPE_V16HI_V16HI:
36989 case V4DI_FTYPE_V4DI_V4DI:
36990 case V4DI_FTYPE_V8SI_V8SI:
36991 case V8DI_FTYPE_V64QI_V64QI:
36992 if (comparison == UNKNOWN)
36993 return ix86_expand_binop_builtin (icode, exp, target);
36994 nargs = 2;
36995 break;
36996 case V4SF_FTYPE_V4SF_V4SF_SWAP:
36997 case V2DF_FTYPE_V2DF_V2DF_SWAP:
36998 gcc_assert (comparison != UNKNOWN);
36999 nargs = 2;
37000 swap = true;
37001 break;
37002 case V16HI_FTYPE_V16HI_V8HI_COUNT:
37003 case V16HI_FTYPE_V16HI_SI_COUNT:
37004 case V8SI_FTYPE_V8SI_V4SI_COUNT:
37005 case V8SI_FTYPE_V8SI_SI_COUNT:
37006 case V4DI_FTYPE_V4DI_V2DI_COUNT:
37007 case V4DI_FTYPE_V4DI_INT_COUNT:
37008 case V8HI_FTYPE_V8HI_V8HI_COUNT:
37009 case V8HI_FTYPE_V8HI_SI_COUNT:
37010 case V4SI_FTYPE_V4SI_V4SI_COUNT:
37011 case V4SI_FTYPE_V4SI_SI_COUNT:
37012 case V4HI_FTYPE_V4HI_V4HI_COUNT:
37013 case V4HI_FTYPE_V4HI_SI_COUNT:
37014 case V2DI_FTYPE_V2DI_V2DI_COUNT:
37015 case V2DI_FTYPE_V2DI_SI_COUNT:
37016 case V2SI_FTYPE_V2SI_V2SI_COUNT:
37017 case V2SI_FTYPE_V2SI_SI_COUNT:
37018 case V1DI_FTYPE_V1DI_V1DI_COUNT:
37019 case V1DI_FTYPE_V1DI_SI_COUNT:
37020 nargs = 2;
37021 last_arg_count = true;
37022 break;
37023 case UINT64_FTYPE_UINT64_UINT64:
37024 case UINT_FTYPE_UINT_UINT:
37025 case UINT_FTYPE_UINT_USHORT:
37026 case UINT_FTYPE_UINT_UCHAR:
37027 case UINT16_FTYPE_UINT16_INT:
37028 case UINT8_FTYPE_UINT8_INT:
37029 case UHI_FTYPE_UHI_UHI:
37030 case USI_FTYPE_USI_USI:
37031 case UDI_FTYPE_UDI_UDI:
37032 case V16SI_FTYPE_V8DF_V8DF:
37033 nargs = 2;
37034 break;
37035 case V2DI_FTYPE_V2DI_INT_CONVERT:
37036 nargs = 2;
37037 rmode = V1TImode;
37038 nargs_constant = 1;
37039 break;
37040 case V4DI_FTYPE_V4DI_INT_CONVERT:
37041 nargs = 2;
37042 rmode = V2TImode;
37043 nargs_constant = 1;
37044 break;
37045 case V8DI_FTYPE_V8DI_INT_CONVERT:
37046 nargs = 2;
37047 rmode = V4TImode;
37048 nargs_constant = 1;
37049 break;
37050 case V8HI_FTYPE_V8HI_INT:
37051 case V8HI_FTYPE_V8SF_INT:
37052 case V16HI_FTYPE_V16SF_INT:
37053 case V8HI_FTYPE_V4SF_INT:
37054 case V8SF_FTYPE_V8SF_INT:
37055 case V4SF_FTYPE_V16SF_INT:
37056 case V16SF_FTYPE_V16SF_INT:
37057 case V4SI_FTYPE_V4SI_INT:
37058 case V4SI_FTYPE_V8SI_INT:
37059 case V4HI_FTYPE_V4HI_INT:
37060 case V4DF_FTYPE_V4DF_INT:
37061 case V4DF_FTYPE_V8DF_INT:
37062 case V4SF_FTYPE_V4SF_INT:
37063 case V4SF_FTYPE_V8SF_INT:
37064 case V2DI_FTYPE_V2DI_INT:
37065 case V2DF_FTYPE_V2DF_INT:
37066 case V2DF_FTYPE_V4DF_INT:
37067 case V16HI_FTYPE_V16HI_INT:
37068 case V8SI_FTYPE_V8SI_INT:
37069 case V16SI_FTYPE_V16SI_INT:
37070 case V4SI_FTYPE_V16SI_INT:
37071 case V4DI_FTYPE_V4DI_INT:
37072 case V2DI_FTYPE_V4DI_INT:
37073 case V4DI_FTYPE_V8DI_INT:
37074 case QI_FTYPE_V4SF_INT:
37075 case QI_FTYPE_V2DF_INT:
37076 nargs = 2;
37077 nargs_constant = 1;
37078 break;
37079 case V16QI_FTYPE_V16QI_V16QI_V16QI:
37080 case V8SF_FTYPE_V8SF_V8SF_V8SF:
37081 case V4DF_FTYPE_V4DF_V4DF_V4DF:
37082 case V4SF_FTYPE_V4SF_V4SF_V4SF:
37083 case V2DF_FTYPE_V2DF_V2DF_V2DF:
37084 case V32QI_FTYPE_V32QI_V32QI_V32QI:
37085 case UHI_FTYPE_V16SI_V16SI_UHI:
37086 case UQI_FTYPE_V8DI_V8DI_UQI:
37087 case V16HI_FTYPE_V16SI_V16HI_UHI:
37088 case V16QI_FTYPE_V16SI_V16QI_UHI:
37089 case V16QI_FTYPE_V8DI_V16QI_UQI:
37090 case V16SF_FTYPE_V16SF_V16SF_UHI:
37091 case V16SF_FTYPE_V4SF_V16SF_UHI:
37092 case V16SI_FTYPE_SI_V16SI_UHI:
37093 case V16SI_FTYPE_V16HI_V16SI_UHI:
37094 case V16SI_FTYPE_V16QI_V16SI_UHI:
37095 case V8SF_FTYPE_V4SF_V8SF_UQI:
37096 case V4DF_FTYPE_V2DF_V4DF_UQI:
37097 case V8SI_FTYPE_V4SI_V8SI_UQI:
37098 case V8SI_FTYPE_SI_V8SI_UQI:
37099 case V4SI_FTYPE_V4SI_V4SI_UQI:
37100 case V4SI_FTYPE_SI_V4SI_UQI:
37101 case V4DI_FTYPE_V2DI_V4DI_UQI:
37102 case V4DI_FTYPE_DI_V4DI_UQI:
37103 case V2DI_FTYPE_V2DI_V2DI_UQI:
37104 case V2DI_FTYPE_DI_V2DI_UQI:
37105 case V64QI_FTYPE_V64QI_V64QI_UDI:
37106 case V64QI_FTYPE_V16QI_V64QI_UDI:
37107 case V64QI_FTYPE_QI_V64QI_UDI:
37108 case V32QI_FTYPE_V32QI_V32QI_USI:
37109 case V32QI_FTYPE_V16QI_V32QI_USI:
37110 case V32QI_FTYPE_QI_V32QI_USI:
37111 case V16QI_FTYPE_V16QI_V16QI_UHI:
37112 case V16QI_FTYPE_QI_V16QI_UHI:
37113 case V32HI_FTYPE_V8HI_V32HI_USI:
37114 case V32HI_FTYPE_HI_V32HI_USI:
37115 case V16HI_FTYPE_V8HI_V16HI_UHI:
37116 case V16HI_FTYPE_HI_V16HI_UHI:
37117 case V8HI_FTYPE_V8HI_V8HI_UQI:
37118 case V8HI_FTYPE_HI_V8HI_UQI:
37119 case V8SF_FTYPE_V8HI_V8SF_UQI:
37120 case V4SF_FTYPE_V8HI_V4SF_UQI:
37121 case V8SI_FTYPE_V8SF_V8SI_UQI:
37122 case V4SI_FTYPE_V4SF_V4SI_UQI:
37123 case V4DI_FTYPE_V4SF_V4DI_UQI:
37124 case V2DI_FTYPE_V4SF_V2DI_UQI:
37125 case V4SF_FTYPE_V4DI_V4SF_UQI:
37126 case V4SF_FTYPE_V2DI_V4SF_UQI:
37127 case V4DF_FTYPE_V4DI_V4DF_UQI:
37128 case V2DF_FTYPE_V2DI_V2DF_UQI:
37129 case V16QI_FTYPE_V8HI_V16QI_UQI:
37130 case V16QI_FTYPE_V16HI_V16QI_UHI:
37131 case V16QI_FTYPE_V4SI_V16QI_UQI:
37132 case V16QI_FTYPE_V8SI_V16QI_UQI:
37133 case V8HI_FTYPE_V4SI_V8HI_UQI:
37134 case V8HI_FTYPE_V8SI_V8HI_UQI:
37135 case V16QI_FTYPE_V2DI_V16QI_UQI:
37136 case V16QI_FTYPE_V4DI_V16QI_UQI:
37137 case V8HI_FTYPE_V2DI_V8HI_UQI:
37138 case V8HI_FTYPE_V4DI_V8HI_UQI:
37139 case V4SI_FTYPE_V2DI_V4SI_UQI:
37140 case V4SI_FTYPE_V4DI_V4SI_UQI:
37141 case V32QI_FTYPE_V32HI_V32QI_USI:
37142 case UHI_FTYPE_V16QI_V16QI_UHI:
37143 case USI_FTYPE_V32QI_V32QI_USI:
37144 case UDI_FTYPE_V64QI_V64QI_UDI:
37145 case UQI_FTYPE_V8HI_V8HI_UQI:
37146 case UHI_FTYPE_V16HI_V16HI_UHI:
37147 case USI_FTYPE_V32HI_V32HI_USI:
37148 case UQI_FTYPE_V4SI_V4SI_UQI:
37149 case UQI_FTYPE_V8SI_V8SI_UQI:
37150 case UQI_FTYPE_V2DI_V2DI_UQI:
37151 case UQI_FTYPE_V4DI_V4DI_UQI:
37152 case V4SF_FTYPE_V2DF_V4SF_UQI:
37153 case V4SF_FTYPE_V4DF_V4SF_UQI:
37154 case V16SI_FTYPE_V16SI_V16SI_UHI:
37155 case V16SI_FTYPE_V4SI_V16SI_UHI:
37156 case V2DI_FTYPE_V4SI_V2DI_UQI:
37157 case V2DI_FTYPE_V8HI_V2DI_UQI:
37158 case V2DI_FTYPE_V16QI_V2DI_UQI:
37159 case V4DI_FTYPE_V4DI_V4DI_UQI:
37160 case V4DI_FTYPE_V4SI_V4DI_UQI:
37161 case V4DI_FTYPE_V8HI_V4DI_UQI:
37162 case V4DI_FTYPE_V16QI_V4DI_UQI:
37163 case V4DI_FTYPE_V4DF_V4DI_UQI:
37164 case V2DI_FTYPE_V2DF_V2DI_UQI:
37165 case V4SI_FTYPE_V4DF_V4SI_UQI:
37166 case V4SI_FTYPE_V2DF_V4SI_UQI:
37167 case V4SI_FTYPE_V8HI_V4SI_UQI:
37168 case V4SI_FTYPE_V16QI_V4SI_UQI:
37169 case V4DI_FTYPE_V4DI_V4DI_V4DI:
37170 case V8DF_FTYPE_V2DF_V8DF_UQI:
37171 case V8DF_FTYPE_V4DF_V8DF_UQI:
37172 case V8DF_FTYPE_V8DF_V8DF_UQI:
37173 case V8SF_FTYPE_V8SF_V8SF_UQI:
37174 case V8SF_FTYPE_V8SI_V8SF_UQI:
37175 case V4DF_FTYPE_V4DF_V4DF_UQI:
37176 case V4SF_FTYPE_V4SF_V4SF_UQI:
37177 case V2DF_FTYPE_V2DF_V2DF_UQI:
37178 case V2DF_FTYPE_V4SF_V2DF_UQI:
37179 case V2DF_FTYPE_V4SI_V2DF_UQI:
37180 case V4SF_FTYPE_V4SI_V4SF_UQI:
37181 case V4DF_FTYPE_V4SF_V4DF_UQI:
37182 case V4DF_FTYPE_V4SI_V4DF_UQI:
37183 case V8SI_FTYPE_V8SI_V8SI_UQI:
37184 case V8SI_FTYPE_V8HI_V8SI_UQI:
37185 case V8SI_FTYPE_V16QI_V8SI_UQI:
37186 case V8DF_FTYPE_V8SI_V8DF_UQI:
37187 case V8DI_FTYPE_DI_V8DI_UQI:
37188 case V16SF_FTYPE_V8SF_V16SF_UHI:
37189 case V16SI_FTYPE_V8SI_V16SI_UHI:
37190 case V16HI_FTYPE_V16HI_V16HI_UHI:
37191 case V8HI_FTYPE_V16QI_V8HI_UQI:
37192 case V16HI_FTYPE_V16QI_V16HI_UHI:
37193 case V32HI_FTYPE_V32HI_V32HI_USI:
37194 case V32HI_FTYPE_V32QI_V32HI_USI:
37195 case V8DI_FTYPE_V16QI_V8DI_UQI:
37196 case V8DI_FTYPE_V2DI_V8DI_UQI:
37197 case V8DI_FTYPE_V4DI_V8DI_UQI:
37198 case V8DI_FTYPE_V8DI_V8DI_UQI:
37199 case V8DI_FTYPE_V8HI_V8DI_UQI:
37200 case V8DI_FTYPE_V8SI_V8DI_UQI:
37201 case V8HI_FTYPE_V8DI_V8HI_UQI:
37202 case V8SI_FTYPE_V8DI_V8SI_UQI:
37203 case V4SI_FTYPE_V4SI_V4SI_V4SI:
37204 nargs = 3;
37205 break;
37206 case V32QI_FTYPE_V32QI_V32QI_INT:
37207 case V16HI_FTYPE_V16HI_V16HI_INT:
37208 case V16QI_FTYPE_V16QI_V16QI_INT:
37209 case V4DI_FTYPE_V4DI_V4DI_INT:
37210 case V8HI_FTYPE_V8HI_V8HI_INT:
37211 case V8SI_FTYPE_V8SI_V8SI_INT:
37212 case V8SI_FTYPE_V8SI_V4SI_INT:
37213 case V8SF_FTYPE_V8SF_V8SF_INT:
37214 case V8SF_FTYPE_V8SF_V4SF_INT:
37215 case V4SI_FTYPE_V4SI_V4SI_INT:
37216 case V4DF_FTYPE_V4DF_V4DF_INT:
37217 case V16SF_FTYPE_V16SF_V16SF_INT:
37218 case V16SF_FTYPE_V16SF_V4SF_INT:
37219 case V16SI_FTYPE_V16SI_V4SI_INT:
37220 case V4DF_FTYPE_V4DF_V2DF_INT:
37221 case V4SF_FTYPE_V4SF_V4SF_INT:
37222 case V2DI_FTYPE_V2DI_V2DI_INT:
37223 case V4DI_FTYPE_V4DI_V2DI_INT:
37224 case V2DF_FTYPE_V2DF_V2DF_INT:
37225 case UQI_FTYPE_V8DI_V8UDI_INT:
37226 case UQI_FTYPE_V8DF_V8DF_INT:
37227 case UQI_FTYPE_V2DF_V2DF_INT:
37228 case UQI_FTYPE_V4SF_V4SF_INT:
37229 case UHI_FTYPE_V16SI_V16SI_INT:
37230 case UHI_FTYPE_V16SF_V16SF_INT:
37231 nargs = 3;
37232 nargs_constant = 1;
37233 break;
37234 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
37235 nargs = 3;
37236 rmode = V4DImode;
37237 nargs_constant = 1;
37238 break;
37239 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
37240 nargs = 3;
37241 rmode = V2DImode;
37242 nargs_constant = 1;
37243 break;
37244 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
37245 nargs = 3;
37246 rmode = DImode;
37247 nargs_constant = 1;
37248 break;
37249 case V2DI_FTYPE_V2DI_UINT_UINT:
37250 nargs = 3;
37251 nargs_constant = 2;
37252 break;
37253 case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
37254 nargs = 3;
37255 rmode = V8DImode;
37256 nargs_constant = 1;
37257 break;
37258 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT:
37259 nargs = 5;
37260 rmode = V8DImode;
37261 mask_pos = 2;
37262 nargs_constant = 1;
37263 break;
37264 case QI_FTYPE_V8DF_INT_UQI:
37265 case QI_FTYPE_V4DF_INT_UQI:
37266 case QI_FTYPE_V2DF_INT_UQI:
37267 case HI_FTYPE_V16SF_INT_UHI:
37268 case QI_FTYPE_V8SF_INT_UQI:
37269 case QI_FTYPE_V4SF_INT_UQI:
37270 nargs = 3;
37271 mask_pos = 1;
37272 nargs_constant = 1;
37273 break;
37274 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_USI_CONVERT:
37275 nargs = 5;
37276 rmode = V4DImode;
37277 mask_pos = 2;
37278 nargs_constant = 1;
37279 break;
37280 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UHI_CONVERT:
37281 nargs = 5;
37282 rmode = V2DImode;
37283 mask_pos = 2;
37284 nargs_constant = 1;
37285 break;
37286 case V32QI_FTYPE_V32QI_V32QI_V32QI_USI:
37287 case V32HI_FTYPE_V32HI_V32HI_V32HI_USI:
37288 case V32HI_FTYPE_V64QI_V64QI_V32HI_USI:
37289 case V16SI_FTYPE_V32HI_V32HI_V16SI_UHI:
37290 case V64QI_FTYPE_V64QI_V64QI_V64QI_UDI:
37291 case V32HI_FTYPE_V32HI_V8HI_V32HI_USI:
37292 case V16HI_FTYPE_V16HI_V8HI_V16HI_UHI:
37293 case V8SI_FTYPE_V8SI_V4SI_V8SI_UQI:
37294 case V4DI_FTYPE_V4DI_V2DI_V4DI_UQI:
37295 case V64QI_FTYPE_V32HI_V32HI_V64QI_UDI:
37296 case V32QI_FTYPE_V16HI_V16HI_V32QI_USI:
37297 case V16QI_FTYPE_V8HI_V8HI_V16QI_UHI:
37298 case V32HI_FTYPE_V16SI_V16SI_V32HI_USI:
37299 case V16HI_FTYPE_V8SI_V8SI_V16HI_UHI:
37300 case V8HI_FTYPE_V4SI_V4SI_V8HI_UQI:
37301 case V4DF_FTYPE_V4DF_V4DI_V4DF_UQI:
37302 case V8SF_FTYPE_V8SF_V8SI_V8SF_UQI:
37303 case V4SF_FTYPE_V4SF_V4SI_V4SF_UQI:
37304 case V2DF_FTYPE_V2DF_V2DI_V2DF_UQI:
37305 case V2DI_FTYPE_V4SI_V4SI_V2DI_UQI:
37306 case V4DI_FTYPE_V8SI_V8SI_V4DI_UQI:
37307 case V4DF_FTYPE_V4DI_V4DF_V4DF_UQI:
37308 case V8SF_FTYPE_V8SI_V8SF_V8SF_UQI:
37309 case V2DF_FTYPE_V2DI_V2DF_V2DF_UQI:
37310 case V4SF_FTYPE_V4SI_V4SF_V4SF_UQI:
37311 case V16SF_FTYPE_V16SF_V16SF_V16SF_UHI:
37312 case V16SF_FTYPE_V16SF_V16SI_V16SF_UHI:
37313 case V16SF_FTYPE_V16SI_V16SF_V16SF_UHI:
37314 case V16SI_FTYPE_V16SI_V16SI_V16SI_UHI:
37315 case V16SI_FTYPE_V16SI_V4SI_V16SI_UHI:
37316 case V8HI_FTYPE_V8HI_V8HI_V8HI_UQI:
37317 case V8SI_FTYPE_V8SI_V8SI_V8SI_UQI:
37318 case V4SI_FTYPE_V4SI_V4SI_V4SI_UQI:
37319 case V8SF_FTYPE_V8SF_V8SF_V8SF_UQI:
37320 case V16QI_FTYPE_V16QI_V16QI_V16QI_UHI:
37321 case V16HI_FTYPE_V16HI_V16HI_V16HI_UHI:
37322 case V2DI_FTYPE_V2DI_V2DI_V2DI_UQI:
37323 case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI:
37324 case V4DI_FTYPE_V4DI_V4DI_V4DI_UQI:
37325 case V4DF_FTYPE_V4DF_V4DF_V4DF_UQI:
37326 case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI:
37327 case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI:
37328 case V8DF_FTYPE_V8DF_V8DI_V8DF_UQI:
37329 case V8DF_FTYPE_V8DI_V8DF_V8DF_UQI:
37330 case V8DI_FTYPE_V16SI_V16SI_V8DI_UQI:
37331 case V8DI_FTYPE_V8DI_V2DI_V8DI_UQI:
37332 case V8DI_FTYPE_V8DI_V8DI_V8DI_UQI:
37333 case V8HI_FTYPE_V16QI_V16QI_V8HI_UQI:
37334 case V16HI_FTYPE_V32QI_V32QI_V16HI_UHI:
37335 case V8SI_FTYPE_V16HI_V16HI_V8SI_UQI:
37336 case V4SI_FTYPE_V8HI_V8HI_V4SI_UQI:
37337 nargs = 4;
37338 break;
37339 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
37340 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
37341 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
37342 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
37343 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
37344 nargs = 4;
37345 nargs_constant = 1;
37346 break;
37347 case UQI_FTYPE_V4DI_V4DI_INT_UQI:
37348 case UQI_FTYPE_V8SI_V8SI_INT_UQI:
37349 case QI_FTYPE_V4DF_V4DF_INT_UQI:
37350 case QI_FTYPE_V8SF_V8SF_INT_UQI:
37351 case UQI_FTYPE_V2DI_V2DI_INT_UQI:
37352 case UQI_FTYPE_V4SI_V4SI_INT_UQI:
37353 case UQI_FTYPE_V2DF_V2DF_INT_UQI:
37354 case UQI_FTYPE_V4SF_V4SF_INT_UQI:
37355 case UDI_FTYPE_V64QI_V64QI_INT_UDI:
37356 case USI_FTYPE_V32QI_V32QI_INT_USI:
37357 case UHI_FTYPE_V16QI_V16QI_INT_UHI:
37358 case USI_FTYPE_V32HI_V32HI_INT_USI:
37359 case UHI_FTYPE_V16HI_V16HI_INT_UHI:
37360 case UQI_FTYPE_V8HI_V8HI_INT_UQI:
37361 nargs = 4;
37362 mask_pos = 1;
37363 nargs_constant = 1;
37364 break;
37365 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
37366 nargs = 4;
37367 nargs_constant = 2;
37368 break;
37369 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
37370 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
37371 nargs = 4;
37372 break;
37373 case UQI_FTYPE_V8DI_V8DI_INT_UQI:
37374 case UHI_FTYPE_V16SI_V16SI_INT_UHI:
37375 mask_pos = 1;
37376 nargs = 4;
37377 nargs_constant = 1;
37378 break;
37379 case V8SF_FTYPE_V8SF_INT_V8SF_UQI:
37380 case V4SF_FTYPE_V4SF_INT_V4SF_UQI:
37381 case V2DF_FTYPE_V4DF_INT_V2DF_UQI:
37382 case V2DI_FTYPE_V4DI_INT_V2DI_UQI:
37383 case V8SF_FTYPE_V16SF_INT_V8SF_UQI:
37384 case V8SI_FTYPE_V16SI_INT_V8SI_UQI:
37385 case V2DF_FTYPE_V8DF_INT_V2DF_UQI:
37386 case V2DI_FTYPE_V8DI_INT_V2DI_UQI:
37387 case V4SF_FTYPE_V8SF_INT_V4SF_UQI:
37388 case V4SI_FTYPE_V8SI_INT_V4SI_UQI:
37389 case V8HI_FTYPE_V8SF_INT_V8HI_UQI:
37390 case V8HI_FTYPE_V4SF_INT_V8HI_UQI:
37391 case V32HI_FTYPE_V32HI_INT_V32HI_USI:
37392 case V16HI_FTYPE_V16HI_INT_V16HI_UHI:
37393 case V8HI_FTYPE_V8HI_INT_V8HI_UQI:
37394 case V4DI_FTYPE_V4DI_INT_V4DI_UQI:
37395 case V2DI_FTYPE_V2DI_INT_V2DI_UQI:
37396 case V8SI_FTYPE_V8SI_INT_V8SI_UQI:
37397 case V4SI_FTYPE_V4SI_INT_V4SI_UQI:
37398 case V4DF_FTYPE_V4DF_INT_V4DF_UQI:
37399 case V2DF_FTYPE_V2DF_INT_V2DF_UQI:
37400 case V8DF_FTYPE_V8DF_INT_V8DF_UQI:
37401 case V16SF_FTYPE_V16SF_INT_V16SF_UHI:
37402 case V16HI_FTYPE_V16SF_INT_V16HI_UHI:
37403 case V16SI_FTYPE_V16SI_INT_V16SI_UHI:
37404 case V4SI_FTYPE_V16SI_INT_V4SI_UQI:
37405 case V4DI_FTYPE_V8DI_INT_V4DI_UQI:
37406 case V4DF_FTYPE_V8DF_INT_V4DF_UQI:
37407 case V4SF_FTYPE_V16SF_INT_V4SF_UQI:
37408 case V8DI_FTYPE_V8DI_INT_V8DI_UQI:
37409 nargs = 4;
37410 mask_pos = 2;
37411 nargs_constant = 1;
37412 break;
37413 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI:
37414 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI:
37415 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI:
37416 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI:
37417 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI:
37418 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI:
37419 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI:
37420 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI:
37421 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI:
37422 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI:
37423 case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI:
37424 case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI:
37425 case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI:
37426 case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI:
37427 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI:
37428 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI:
37429 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_UQI:
37430 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UQI:
37431 case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI:
37432 case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_UHI:
37433 case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_UQI:
37434 case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI:
37435 case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI:
37436 case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_UQI:
37437 case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_UQI:
37438 case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_UQI:
37439 case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_UQI:
37440 nargs = 5;
37441 mask_pos = 2;
37442 nargs_constant = 1;
37443 break;
37444 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI:
37445 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI:
37446 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_UQI:
37447 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_UQI:
37448 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI:
37449 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI:
37450 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI:
37451 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI:
37452 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI:
37453 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI:
37454 nargs = 5;
37455 nargs = 5;
37456 mask_pos = 1;
37457 nargs_constant = 1;
37458 break;
37460 default:
37461 gcc_unreachable ();
37464 gcc_assert (nargs <= ARRAY_SIZE (args));
37466 if (comparison != UNKNOWN)
37468 gcc_assert (nargs == 2);
37469 return ix86_expand_sse_compare (d, exp, target, swap);
37472 if (rmode == VOIDmode || rmode == tmode)
37474 if (optimize
37475 || target == 0
37476 || GET_MODE (target) != tmode
37477 || !insn_p->operand[0].predicate (target, tmode))
37478 target = gen_reg_rtx (tmode);
37479 real_target = target;
37481 else
37483 real_target = gen_reg_rtx (tmode);
37484 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
37487 for (i = 0; i < nargs; i++)
37489 tree arg = CALL_EXPR_ARG (exp, i);
37490 rtx op = expand_normal (arg);
37491 machine_mode mode = insn_p->operand[i + 1].mode;
37492 bool match = insn_p->operand[i + 1].predicate (op, mode);
37494 if (last_arg_count && (i + 1) == nargs)
37496 /* SIMD shift insns take either an 8-bit immediate or
37497 register as count. But builtin functions take int as
37498 count. If count doesn't match, we put it in register. */
37499 if (!match)
37501 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
37502 if (!insn_p->operand[i + 1].predicate (op, mode))
37503 op = copy_to_reg (op);
37506 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37507 (!mask_pos && (nargs - i) <= nargs_constant))
37509 if (!match)
37510 switch (icode)
37512 case CODE_FOR_avx_vinsertf128v4di:
37513 case CODE_FOR_avx_vextractf128v4di:
37514 error ("the last argument must be an 1-bit immediate");
37515 return const0_rtx;
37517 case CODE_FOR_avx512f_cmpv8di3_mask:
37518 case CODE_FOR_avx512f_cmpv16si3_mask:
37519 case CODE_FOR_avx512f_ucmpv8di3_mask:
37520 case CODE_FOR_avx512f_ucmpv16si3_mask:
37521 case CODE_FOR_avx512vl_cmpv4di3_mask:
37522 case CODE_FOR_avx512vl_cmpv8si3_mask:
37523 case CODE_FOR_avx512vl_ucmpv4di3_mask:
37524 case CODE_FOR_avx512vl_ucmpv8si3_mask:
37525 case CODE_FOR_avx512vl_cmpv2di3_mask:
37526 case CODE_FOR_avx512vl_cmpv4si3_mask:
37527 case CODE_FOR_avx512vl_ucmpv2di3_mask:
37528 case CODE_FOR_avx512vl_ucmpv4si3_mask:
37529 error ("the last argument must be a 3-bit immediate");
37530 return const0_rtx;
37532 case CODE_FOR_sse4_1_roundsd:
37533 case CODE_FOR_sse4_1_roundss:
37535 case CODE_FOR_sse4_1_roundpd:
37536 case CODE_FOR_sse4_1_roundps:
37537 case CODE_FOR_avx_roundpd256:
37538 case CODE_FOR_avx_roundps256:
37540 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
37541 case CODE_FOR_sse4_1_roundps_sfix:
37542 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
37543 case CODE_FOR_avx_roundps_sfix256:
37545 case CODE_FOR_sse4_1_blendps:
37546 case CODE_FOR_avx_blendpd256:
37547 case CODE_FOR_avx_vpermilv4df:
37548 case CODE_FOR_avx_vpermilv4df_mask:
37549 case CODE_FOR_avx512f_getmantv8df_mask:
37550 case CODE_FOR_avx512f_getmantv16sf_mask:
37551 case CODE_FOR_avx512vl_getmantv8sf_mask:
37552 case CODE_FOR_avx512vl_getmantv4df_mask:
37553 case CODE_FOR_avx512vl_getmantv4sf_mask:
37554 case CODE_FOR_avx512vl_getmantv2df_mask:
37555 case CODE_FOR_avx512dq_rangepv8df_mask_round:
37556 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
37557 case CODE_FOR_avx512dq_rangepv4df_mask:
37558 case CODE_FOR_avx512dq_rangepv8sf_mask:
37559 case CODE_FOR_avx512dq_rangepv2df_mask:
37560 case CODE_FOR_avx512dq_rangepv4sf_mask:
37561 case CODE_FOR_avx_shufpd256_mask:
37562 error ("the last argument must be a 4-bit immediate");
37563 return const0_rtx;
37565 case CODE_FOR_sha1rnds4:
37566 case CODE_FOR_sse4_1_blendpd:
37567 case CODE_FOR_avx_vpermilv2df:
37568 case CODE_FOR_avx_vpermilv2df_mask:
37569 case CODE_FOR_xop_vpermil2v2df3:
37570 case CODE_FOR_xop_vpermil2v4sf3:
37571 case CODE_FOR_xop_vpermil2v4df3:
37572 case CODE_FOR_xop_vpermil2v8sf3:
37573 case CODE_FOR_avx512f_vinsertf32x4_mask:
37574 case CODE_FOR_avx512f_vinserti32x4_mask:
37575 case CODE_FOR_avx512f_vextractf32x4_mask:
37576 case CODE_FOR_avx512f_vextracti32x4_mask:
37577 case CODE_FOR_sse2_shufpd:
37578 case CODE_FOR_sse2_shufpd_mask:
37579 case CODE_FOR_avx512dq_shuf_f64x2_mask:
37580 case CODE_FOR_avx512dq_shuf_i64x2_mask:
37581 case CODE_FOR_avx512vl_shuf_i32x4_mask:
37582 case CODE_FOR_avx512vl_shuf_f32x4_mask:
37583 error ("the last argument must be a 2-bit immediate");
37584 return const0_rtx;
37586 case CODE_FOR_avx_vextractf128v4df:
37587 case CODE_FOR_avx_vextractf128v8sf:
37588 case CODE_FOR_avx_vextractf128v8si:
37589 case CODE_FOR_avx_vinsertf128v4df:
37590 case CODE_FOR_avx_vinsertf128v8sf:
37591 case CODE_FOR_avx_vinsertf128v8si:
37592 case CODE_FOR_avx512f_vinsertf64x4_mask:
37593 case CODE_FOR_avx512f_vinserti64x4_mask:
37594 case CODE_FOR_avx512f_vextractf64x4_mask:
37595 case CODE_FOR_avx512f_vextracti64x4_mask:
37596 case CODE_FOR_avx512dq_vinsertf32x8_mask:
37597 case CODE_FOR_avx512dq_vinserti32x8_mask:
37598 case CODE_FOR_avx512vl_vinsertv4df:
37599 case CODE_FOR_avx512vl_vinsertv4di:
37600 case CODE_FOR_avx512vl_vinsertv8sf:
37601 case CODE_FOR_avx512vl_vinsertv8si:
37602 error ("the last argument must be a 1-bit immediate");
37603 return const0_rtx;
37605 case CODE_FOR_avx_vmcmpv2df3:
37606 case CODE_FOR_avx_vmcmpv4sf3:
37607 case CODE_FOR_avx_cmpv2df3:
37608 case CODE_FOR_avx_cmpv4sf3:
37609 case CODE_FOR_avx_cmpv4df3:
37610 case CODE_FOR_avx_cmpv8sf3:
37611 case CODE_FOR_avx512f_cmpv8df3_mask:
37612 case CODE_FOR_avx512f_cmpv16sf3_mask:
37613 case CODE_FOR_avx512f_vmcmpv2df3_mask:
37614 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
37615 error ("the last argument must be a 5-bit immediate");
37616 return const0_rtx;
37618 default:
37619 switch (nargs_constant)
37621 case 2:
37622 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37623 (!mask_pos && (nargs - i) == nargs_constant))
37625 error ("the next to last argument must be an 8-bit immediate");
37626 break;
37628 case 1:
37629 error ("the last argument must be an 8-bit immediate");
37630 break;
37631 default:
37632 gcc_unreachable ();
37634 return const0_rtx;
37637 else
37639 if (VECTOR_MODE_P (mode))
37640 op = safe_vector_operand (op, mode);
37642 /* If we aren't optimizing, only allow one memory operand to
37643 be generated. */
37644 if (memory_operand (op, mode))
37645 num_memory++;
37647 op = fixup_modeless_constant (op, mode);
37649 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37651 if (optimize || !match || num_memory > 1)
37652 op = copy_to_mode_reg (mode, op);
37654 else
37656 op = copy_to_reg (op);
37657 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37661 args[i].op = op;
37662 args[i].mode = mode;
37665 switch (nargs)
37667 case 1:
37668 pat = GEN_FCN (icode) (real_target, args[0].op);
37669 break;
37670 case 2:
37671 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
37672 break;
37673 case 3:
37674 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37675 args[2].op);
37676 break;
37677 case 4:
37678 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37679 args[2].op, args[3].op);
37680 break;
37681 case 5:
37682 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37683 args[2].op, args[3].op, args[4].op);
37684 case 6:
37685 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37686 args[2].op, args[3].op, args[4].op,
37687 args[5].op);
37688 break;
37689 default:
37690 gcc_unreachable ();
37693 if (! pat)
37694 return 0;
37696 emit_insn (pat);
37697 return target;
37700 /* Transform pattern of following layout:
37701 (parallel [
37702 set (A B)
37703 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
37705 into:
37706 (set (A B))
37709 (parallel [ A B
37711 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
37714 into:
37715 (parallel [ A B ... ]) */
37717 static rtx
37718 ix86_erase_embedded_rounding (rtx pat)
37720 if (GET_CODE (pat) == INSN)
37721 pat = PATTERN (pat);
37723 gcc_assert (GET_CODE (pat) == PARALLEL);
37725 if (XVECLEN (pat, 0) == 2)
37727 rtx p0 = XVECEXP (pat, 0, 0);
37728 rtx p1 = XVECEXP (pat, 0, 1);
37730 gcc_assert (GET_CODE (p0) == SET
37731 && GET_CODE (p1) == UNSPEC
37732 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
37734 return p0;
37736 else
37738 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
37739 int i = 0;
37740 int j = 0;
37742 for (; i < XVECLEN (pat, 0); ++i)
37744 rtx elem = XVECEXP (pat, 0, i);
37745 if (GET_CODE (elem) != UNSPEC
37746 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
37747 res [j++] = elem;
37750 /* No more than 1 occurence was removed. */
37751 gcc_assert (j >= XVECLEN (pat, 0) - 1);
37753 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
37757 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
37758 with rounding. */
37759 static rtx
37760 ix86_expand_sse_comi_round (const struct builtin_description *d,
37761 tree exp, rtx target)
37763 rtx pat, set_dst;
37764 tree arg0 = CALL_EXPR_ARG (exp, 0);
37765 tree arg1 = CALL_EXPR_ARG (exp, 1);
37766 tree arg2 = CALL_EXPR_ARG (exp, 2);
37767 tree arg3 = CALL_EXPR_ARG (exp, 3);
37768 rtx op0 = expand_normal (arg0);
37769 rtx op1 = expand_normal (arg1);
37770 rtx op2 = expand_normal (arg2);
37771 rtx op3 = expand_normal (arg3);
37772 enum insn_code icode = d->icode;
37773 const struct insn_data_d *insn_p = &insn_data[icode];
37774 machine_mode mode0 = insn_p->operand[0].mode;
37775 machine_mode mode1 = insn_p->operand[1].mode;
37776 enum rtx_code comparison = UNEQ;
37777 bool need_ucomi = false;
37779 /* See avxintrin.h for values. */
37780 enum rtx_code comi_comparisons[32] =
37782 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
37783 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
37784 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
37786 bool need_ucomi_values[32] =
37788 true, false, false, true, true, false, false, true,
37789 true, false, false, true, true, false, false, true,
37790 false, true, true, false, false, true, true, false,
37791 false, true, true, false, false, true, true, false
37794 if (!CONST_INT_P (op2))
37796 error ("the third argument must be comparison constant");
37797 return const0_rtx;
37799 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
37801 error ("incorrect comparison mode");
37802 return const0_rtx;
37805 if (!insn_p->operand[2].predicate (op3, SImode))
37807 error ("incorrect rounding operand");
37808 return const0_rtx;
37811 comparison = comi_comparisons[INTVAL (op2)];
37812 need_ucomi = need_ucomi_values[INTVAL (op2)];
37814 if (VECTOR_MODE_P (mode0))
37815 op0 = safe_vector_operand (op0, mode0);
37816 if (VECTOR_MODE_P (mode1))
37817 op1 = safe_vector_operand (op1, mode1);
37819 target = gen_reg_rtx (SImode);
37820 emit_move_insn (target, const0_rtx);
37821 target = gen_rtx_SUBREG (QImode, target, 0);
37823 if ((optimize && !register_operand (op0, mode0))
37824 || !insn_p->operand[0].predicate (op0, mode0))
37825 op0 = copy_to_mode_reg (mode0, op0);
37826 if ((optimize && !register_operand (op1, mode1))
37827 || !insn_p->operand[1].predicate (op1, mode1))
37828 op1 = copy_to_mode_reg (mode1, op1);
37830 if (need_ucomi)
37831 icode = icode == CODE_FOR_sse_comi_round
37832 ? CODE_FOR_sse_ucomi_round
37833 : CODE_FOR_sse2_ucomi_round;
37835 pat = GEN_FCN (icode) (op0, op1, op3);
37836 if (! pat)
37837 return 0;
37839 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
37840 if (INTVAL (op3) == NO_ROUND)
37842 pat = ix86_erase_embedded_rounding (pat);
37843 if (! pat)
37844 return 0;
37846 set_dst = SET_DEST (pat);
37848 else
37850 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
37851 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
37854 emit_insn (pat);
37855 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
37856 gen_rtx_fmt_ee (comparison, QImode,
37857 set_dst,
37858 const0_rtx)));
37860 return SUBREG_REG (target);
37863 static rtx
37864 ix86_expand_round_builtin (const struct builtin_description *d,
37865 tree exp, rtx target)
37867 rtx pat;
37868 unsigned int i, nargs;
37869 struct
37871 rtx op;
37872 machine_mode mode;
37873 } args[6];
37874 enum insn_code icode = d->icode;
37875 const struct insn_data_d *insn_p = &insn_data[icode];
37876 machine_mode tmode = insn_p->operand[0].mode;
37877 unsigned int nargs_constant = 0;
37878 unsigned int redundant_embed_rnd = 0;
37880 switch ((enum ix86_builtin_func_type) d->flag)
37882 case UINT64_FTYPE_V2DF_INT:
37883 case UINT64_FTYPE_V4SF_INT:
37884 case UINT_FTYPE_V2DF_INT:
37885 case UINT_FTYPE_V4SF_INT:
37886 case INT64_FTYPE_V2DF_INT:
37887 case INT64_FTYPE_V4SF_INT:
37888 case INT_FTYPE_V2DF_INT:
37889 case INT_FTYPE_V4SF_INT:
37890 nargs = 2;
37891 break;
37892 case V4SF_FTYPE_V4SF_UINT_INT:
37893 case V4SF_FTYPE_V4SF_UINT64_INT:
37894 case V2DF_FTYPE_V2DF_UINT64_INT:
37895 case V4SF_FTYPE_V4SF_INT_INT:
37896 case V4SF_FTYPE_V4SF_INT64_INT:
37897 case V2DF_FTYPE_V2DF_INT64_INT:
37898 case V4SF_FTYPE_V4SF_V4SF_INT:
37899 case V2DF_FTYPE_V2DF_V2DF_INT:
37900 case V4SF_FTYPE_V4SF_V2DF_INT:
37901 case V2DF_FTYPE_V2DF_V4SF_INT:
37902 nargs = 3;
37903 break;
37904 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
37905 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
37906 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
37907 case V8DI_FTYPE_V8DF_V8DI_QI_INT:
37908 case V8SF_FTYPE_V8DI_V8SF_QI_INT:
37909 case V8DF_FTYPE_V8DI_V8DF_QI_INT:
37910 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
37911 case V8DI_FTYPE_V8SF_V8DI_QI_INT:
37912 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
37913 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
37914 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
37915 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
37916 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
37917 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
37918 nargs = 4;
37919 break;
37920 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
37921 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
37922 nargs_constant = 2;
37923 nargs = 4;
37924 break;
37925 case INT_FTYPE_V4SF_V4SF_INT_INT:
37926 case INT_FTYPE_V2DF_V2DF_INT_INT:
37927 return ix86_expand_sse_comi_round (d, exp, target);
37928 case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT:
37929 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
37930 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
37931 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
37932 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
37933 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
37934 nargs = 5;
37935 break;
37936 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
37937 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
37938 nargs_constant = 4;
37939 nargs = 5;
37940 break;
37941 case UQI_FTYPE_V8DF_V8DF_INT_UQI_INT:
37942 case UQI_FTYPE_V2DF_V2DF_INT_UQI_INT:
37943 case UHI_FTYPE_V16SF_V16SF_INT_UHI_INT:
37944 case UQI_FTYPE_V4SF_V4SF_INT_UQI_INT:
37945 nargs_constant = 3;
37946 nargs = 5;
37947 break;
37948 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
37949 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
37950 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
37951 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
37952 nargs = 6;
37953 nargs_constant = 4;
37954 break;
37955 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
37956 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
37957 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
37958 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
37959 nargs = 6;
37960 nargs_constant = 3;
37961 break;
37962 default:
37963 gcc_unreachable ();
37965 gcc_assert (nargs <= ARRAY_SIZE (args));
37967 if (optimize
37968 || target == 0
37969 || GET_MODE (target) != tmode
37970 || !insn_p->operand[0].predicate (target, tmode))
37971 target = gen_reg_rtx (tmode);
37973 for (i = 0; i < nargs; i++)
37975 tree arg = CALL_EXPR_ARG (exp, i);
37976 rtx op = expand_normal (arg);
37977 machine_mode mode = insn_p->operand[i + 1].mode;
37978 bool match = insn_p->operand[i + 1].predicate (op, mode);
37980 if (i == nargs - nargs_constant)
37982 if (!match)
37984 switch (icode)
37986 case CODE_FOR_avx512f_getmantv8df_mask_round:
37987 case CODE_FOR_avx512f_getmantv16sf_mask_round:
37988 case CODE_FOR_avx512f_vgetmantv2df_round:
37989 case CODE_FOR_avx512f_vgetmantv4sf_round:
37990 error ("the immediate argument must be a 4-bit immediate");
37991 return const0_rtx;
37992 case CODE_FOR_avx512f_cmpv8df3_mask_round:
37993 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
37994 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
37995 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
37996 error ("the immediate argument must be a 5-bit immediate");
37997 return const0_rtx;
37998 default:
37999 error ("the immediate argument must be an 8-bit immediate");
38000 return const0_rtx;
38004 else if (i == nargs-1)
38006 if (!insn_p->operand[nargs].predicate (op, SImode))
38008 error ("incorrect rounding operand");
38009 return const0_rtx;
38012 /* If there is no rounding use normal version of the pattern. */
38013 if (INTVAL (op) == NO_ROUND)
38014 redundant_embed_rnd = 1;
38016 else
38018 if (VECTOR_MODE_P (mode))
38019 op = safe_vector_operand (op, mode);
38021 op = fixup_modeless_constant (op, mode);
38023 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
38025 if (optimize || !match)
38026 op = copy_to_mode_reg (mode, op);
38028 else
38030 op = copy_to_reg (op);
38031 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
38035 args[i].op = op;
38036 args[i].mode = mode;
38039 switch (nargs)
38041 case 1:
38042 pat = GEN_FCN (icode) (target, args[0].op);
38043 break;
38044 case 2:
38045 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
38046 break;
38047 case 3:
38048 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
38049 args[2].op);
38050 break;
38051 case 4:
38052 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
38053 args[2].op, args[3].op);
38054 break;
38055 case 5:
38056 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
38057 args[2].op, args[3].op, args[4].op);
38058 case 6:
38059 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
38060 args[2].op, args[3].op, args[4].op,
38061 args[5].op);
38062 break;
38063 default:
38064 gcc_unreachable ();
38067 if (!pat)
38068 return 0;
38070 if (redundant_embed_rnd)
38071 pat = ix86_erase_embedded_rounding (pat);
38073 emit_insn (pat);
38074 return target;
38077 /* Subroutine of ix86_expand_builtin to take care of special insns
38078 with variable number of operands. */
38080 static rtx
38081 ix86_expand_special_args_builtin (const struct builtin_description *d,
38082 tree exp, rtx target)
38084 tree arg;
38085 rtx pat, op;
38086 unsigned int i, nargs, arg_adjust, memory;
38087 bool aligned_mem = false;
38088 struct
38090 rtx op;
38091 machine_mode mode;
38092 } args[3];
38093 enum insn_code icode = d->icode;
38094 bool last_arg_constant = false;
38095 const struct insn_data_d *insn_p = &insn_data[icode];
38096 machine_mode tmode = insn_p->operand[0].mode;
38097 enum { load, store } klass;
38099 switch ((enum ix86_builtin_func_type) d->flag)
38101 case VOID_FTYPE_VOID:
38102 emit_insn (GEN_FCN (icode) (target));
38103 return 0;
38104 case VOID_FTYPE_UINT64:
38105 case VOID_FTYPE_UNSIGNED:
38106 nargs = 0;
38107 klass = store;
38108 memory = 0;
38109 break;
38111 case INT_FTYPE_VOID:
38112 case USHORT_FTYPE_VOID:
38113 case UINT64_FTYPE_VOID:
38114 case UNSIGNED_FTYPE_VOID:
38115 nargs = 0;
38116 klass = load;
38117 memory = 0;
38118 break;
38119 case UINT64_FTYPE_PUNSIGNED:
38120 case V2DI_FTYPE_PV2DI:
38121 case V4DI_FTYPE_PV4DI:
38122 case V32QI_FTYPE_PCCHAR:
38123 case V16QI_FTYPE_PCCHAR:
38124 case V8SF_FTYPE_PCV4SF:
38125 case V8SF_FTYPE_PCFLOAT:
38126 case V4SF_FTYPE_PCFLOAT:
38127 case V4DF_FTYPE_PCV2DF:
38128 case V4DF_FTYPE_PCDOUBLE:
38129 case V2DF_FTYPE_PCDOUBLE:
38130 case VOID_FTYPE_PVOID:
38131 case V8DI_FTYPE_PV8DI:
38132 nargs = 1;
38133 klass = load;
38134 memory = 0;
38135 switch (icode)
38137 case CODE_FOR_sse4_1_movntdqa:
38138 case CODE_FOR_avx2_movntdqa:
38139 case CODE_FOR_avx512f_movntdqa:
38140 aligned_mem = true;
38141 break;
38142 default:
38143 break;
38145 break;
38146 case VOID_FTYPE_PV2SF_V4SF:
38147 case VOID_FTYPE_PV8DI_V8DI:
38148 case VOID_FTYPE_PV4DI_V4DI:
38149 case VOID_FTYPE_PV2DI_V2DI:
38150 case VOID_FTYPE_PCHAR_V32QI:
38151 case VOID_FTYPE_PCHAR_V16QI:
38152 case VOID_FTYPE_PFLOAT_V16SF:
38153 case VOID_FTYPE_PFLOAT_V8SF:
38154 case VOID_FTYPE_PFLOAT_V4SF:
38155 case VOID_FTYPE_PDOUBLE_V8DF:
38156 case VOID_FTYPE_PDOUBLE_V4DF:
38157 case VOID_FTYPE_PDOUBLE_V2DF:
38158 case VOID_FTYPE_PLONGLONG_LONGLONG:
38159 case VOID_FTYPE_PULONGLONG_ULONGLONG:
38160 case VOID_FTYPE_PINT_INT:
38161 nargs = 1;
38162 klass = store;
38163 /* Reserve memory operand for target. */
38164 memory = ARRAY_SIZE (args);
38165 switch (icode)
38167 /* These builtins and instructions require the memory
38168 to be properly aligned. */
38169 case CODE_FOR_avx_movntv4di:
38170 case CODE_FOR_sse2_movntv2di:
38171 case CODE_FOR_avx_movntv8sf:
38172 case CODE_FOR_sse_movntv4sf:
38173 case CODE_FOR_sse4a_vmmovntv4sf:
38174 case CODE_FOR_avx_movntv4df:
38175 case CODE_FOR_sse2_movntv2df:
38176 case CODE_FOR_sse4a_vmmovntv2df:
38177 case CODE_FOR_sse2_movntidi:
38178 case CODE_FOR_sse_movntq:
38179 case CODE_FOR_sse2_movntisi:
38180 case CODE_FOR_avx512f_movntv16sf:
38181 case CODE_FOR_avx512f_movntv8df:
38182 case CODE_FOR_avx512f_movntv8di:
38183 aligned_mem = true;
38184 break;
38185 default:
38186 break;
38188 break;
38189 case V4SF_FTYPE_V4SF_PCV2SF:
38190 case V2DF_FTYPE_V2DF_PCDOUBLE:
38191 nargs = 2;
38192 klass = load;
38193 memory = 1;
38194 break;
38195 case V8SF_FTYPE_PCV8SF_V8SI:
38196 case V4DF_FTYPE_PCV4DF_V4DI:
38197 case V4SF_FTYPE_PCV4SF_V4SI:
38198 case V2DF_FTYPE_PCV2DF_V2DI:
38199 case V8SI_FTYPE_PCV8SI_V8SI:
38200 case V4DI_FTYPE_PCV4DI_V4DI:
38201 case V4SI_FTYPE_PCV4SI_V4SI:
38202 case V2DI_FTYPE_PCV2DI_V2DI:
38203 nargs = 2;
38204 klass = load;
38205 memory = 0;
38206 break;
38207 case VOID_FTYPE_PV8DF_V8DF_UQI:
38208 case VOID_FTYPE_PV16SF_V16SF_UHI:
38209 case VOID_FTYPE_PV8DI_V8DI_UQI:
38210 case VOID_FTYPE_PV4DI_V4DI_UQI:
38211 case VOID_FTYPE_PV2DI_V2DI_UQI:
38212 case VOID_FTYPE_PV16SI_V16SI_UHI:
38213 case VOID_FTYPE_PV8SI_V8SI_UQI:
38214 case VOID_FTYPE_PV4SI_V4SI_UQI:
38215 switch (icode)
38217 /* These builtins and instructions require the memory
38218 to be properly aligned. */
38219 case CODE_FOR_avx512f_storev16sf_mask:
38220 case CODE_FOR_avx512f_storev16si_mask:
38221 case CODE_FOR_avx512f_storev8df_mask:
38222 case CODE_FOR_avx512f_storev8di_mask:
38223 case CODE_FOR_avx512vl_storev8sf_mask:
38224 case CODE_FOR_avx512vl_storev8si_mask:
38225 case CODE_FOR_avx512vl_storev4df_mask:
38226 case CODE_FOR_avx512vl_storev4di_mask:
38227 case CODE_FOR_avx512vl_storev4sf_mask:
38228 case CODE_FOR_avx512vl_storev4si_mask:
38229 case CODE_FOR_avx512vl_storev2df_mask:
38230 case CODE_FOR_avx512vl_storev2di_mask:
38231 aligned_mem = true;
38232 break;
38233 default:
38234 break;
38236 /* FALLTHRU */
38237 case VOID_FTYPE_PV8SF_V8SI_V8SF:
38238 case VOID_FTYPE_PV4DF_V4DI_V4DF:
38239 case VOID_FTYPE_PV4SF_V4SI_V4SF:
38240 case VOID_FTYPE_PV2DF_V2DI_V2DF:
38241 case VOID_FTYPE_PV8SI_V8SI_V8SI:
38242 case VOID_FTYPE_PV4DI_V4DI_V4DI:
38243 case VOID_FTYPE_PV4SI_V4SI_V4SI:
38244 case VOID_FTYPE_PV2DI_V2DI_V2DI:
38245 case VOID_FTYPE_PV8SI_V8DI_UQI:
38246 case VOID_FTYPE_PV8HI_V8DI_UQI:
38247 case VOID_FTYPE_PV16HI_V16SI_UHI:
38248 case VOID_FTYPE_PV16QI_V8DI_UQI:
38249 case VOID_FTYPE_PV16QI_V16SI_UHI:
38250 case VOID_FTYPE_PV4SI_V4DI_UQI:
38251 case VOID_FTYPE_PV4SI_V2DI_UQI:
38252 case VOID_FTYPE_PV8HI_V4DI_UQI:
38253 case VOID_FTYPE_PV8HI_V2DI_UQI:
38254 case VOID_FTYPE_PV8HI_V8SI_UQI:
38255 case VOID_FTYPE_PV8HI_V4SI_UQI:
38256 case VOID_FTYPE_PV16QI_V4DI_UQI:
38257 case VOID_FTYPE_PV16QI_V2DI_UQI:
38258 case VOID_FTYPE_PV16QI_V8SI_UQI:
38259 case VOID_FTYPE_PV16QI_V4SI_UQI:
38260 case VOID_FTYPE_PV8HI_V8HI_UQI:
38261 case VOID_FTYPE_PV16HI_V16HI_UHI:
38262 case VOID_FTYPE_PV32HI_V32HI_USI:
38263 case VOID_FTYPE_PV16QI_V16QI_UHI:
38264 case VOID_FTYPE_PV32QI_V32QI_USI:
38265 case VOID_FTYPE_PV64QI_V64QI_UDI:
38266 case VOID_FTYPE_PV4DF_V4DF_UQI:
38267 case VOID_FTYPE_PV2DF_V2DF_UQI:
38268 case VOID_FTYPE_PV8SF_V8SF_UQI:
38269 case VOID_FTYPE_PV4SF_V4SF_UQI:
38270 nargs = 2;
38271 klass = store;
38272 /* Reserve memory operand for target. */
38273 memory = ARRAY_SIZE (args);
38274 break;
38275 case V4SF_FTYPE_PCV4SF_V4SF_UQI:
38276 case V8SF_FTYPE_PCV8SF_V8SF_UQI:
38277 case V16SF_FTYPE_PCV16SF_V16SF_UHI:
38278 case V4SI_FTYPE_PCV4SI_V4SI_UQI:
38279 case V8SI_FTYPE_PCV8SI_V8SI_UQI:
38280 case V16SI_FTYPE_PCV16SI_V16SI_UHI:
38281 case V2DF_FTYPE_PCV2DF_V2DF_UQI:
38282 case V4DF_FTYPE_PCV4DF_V4DF_UQI:
38283 case V8DF_FTYPE_PCV8DF_V8DF_UQI:
38284 case V2DI_FTYPE_PCV2DI_V2DI_UQI:
38285 case V4DI_FTYPE_PCV4DI_V4DI_UQI:
38286 case V8DI_FTYPE_PCV8DI_V8DI_UQI:
38287 case V8HI_FTYPE_PCV8HI_V8HI_UQI:
38288 case V16HI_FTYPE_PCV16HI_V16HI_UHI:
38289 case V32HI_FTYPE_PCV32HI_V32HI_USI:
38290 case V16QI_FTYPE_PCV16QI_V16QI_UHI:
38291 case V32QI_FTYPE_PCV32QI_V32QI_USI:
38292 case V64QI_FTYPE_PCV64QI_V64QI_UDI:
38293 nargs = 3;
38294 klass = load;
38295 memory = 0;
38296 switch (icode)
38298 /* These builtins and instructions require the memory
38299 to be properly aligned. */
38300 case CODE_FOR_avx512f_loadv16sf_mask:
38301 case CODE_FOR_avx512f_loadv16si_mask:
38302 case CODE_FOR_avx512f_loadv8df_mask:
38303 case CODE_FOR_avx512f_loadv8di_mask:
38304 case CODE_FOR_avx512vl_loadv8sf_mask:
38305 case CODE_FOR_avx512vl_loadv8si_mask:
38306 case CODE_FOR_avx512vl_loadv4df_mask:
38307 case CODE_FOR_avx512vl_loadv4di_mask:
38308 case CODE_FOR_avx512vl_loadv4sf_mask:
38309 case CODE_FOR_avx512vl_loadv4si_mask:
38310 case CODE_FOR_avx512vl_loadv2df_mask:
38311 case CODE_FOR_avx512vl_loadv2di_mask:
38312 case CODE_FOR_avx512bw_loadv64qi_mask:
38313 case CODE_FOR_avx512vl_loadv32qi_mask:
38314 case CODE_FOR_avx512vl_loadv16qi_mask:
38315 case CODE_FOR_avx512bw_loadv32hi_mask:
38316 case CODE_FOR_avx512vl_loadv16hi_mask:
38317 case CODE_FOR_avx512vl_loadv8hi_mask:
38318 aligned_mem = true;
38319 break;
38320 default:
38321 break;
38323 break;
38324 case VOID_FTYPE_UINT_UINT_UINT:
38325 case VOID_FTYPE_UINT64_UINT_UINT:
38326 case UCHAR_FTYPE_UINT_UINT_UINT:
38327 case UCHAR_FTYPE_UINT64_UINT_UINT:
38328 nargs = 3;
38329 klass = load;
38330 memory = ARRAY_SIZE (args);
38331 last_arg_constant = true;
38332 break;
38333 default:
38334 gcc_unreachable ();
38337 gcc_assert (nargs <= ARRAY_SIZE (args));
38339 if (klass == store)
38341 arg = CALL_EXPR_ARG (exp, 0);
38342 op = expand_normal (arg);
38343 gcc_assert (target == 0);
38344 if (memory)
38346 op = ix86_zero_extend_to_Pmode (op);
38347 target = gen_rtx_MEM (tmode, op);
38348 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
38349 on it. Try to improve it using get_pointer_alignment,
38350 and if the special builtin is one that requires strict
38351 mode alignment, also from it's GET_MODE_ALIGNMENT.
38352 Failure to do so could lead to ix86_legitimate_combined_insn
38353 rejecting all changes to such insns. */
38354 unsigned int align = get_pointer_alignment (arg);
38355 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
38356 align = GET_MODE_ALIGNMENT (tmode);
38357 if (MEM_ALIGN (target) < align)
38358 set_mem_align (target, align);
38360 else
38361 target = force_reg (tmode, op);
38362 arg_adjust = 1;
38364 else
38366 arg_adjust = 0;
38367 if (optimize
38368 || target == 0
38369 || !register_operand (target, tmode)
38370 || GET_MODE (target) != tmode)
38371 target = gen_reg_rtx (tmode);
38374 for (i = 0; i < nargs; i++)
38376 machine_mode mode = insn_p->operand[i + 1].mode;
38377 bool match;
38379 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
38380 op = expand_normal (arg);
38381 match = insn_p->operand[i + 1].predicate (op, mode);
38383 if (last_arg_constant && (i + 1) == nargs)
38385 if (!match)
38387 if (icode == CODE_FOR_lwp_lwpvalsi3
38388 || icode == CODE_FOR_lwp_lwpinssi3
38389 || icode == CODE_FOR_lwp_lwpvaldi3
38390 || icode == CODE_FOR_lwp_lwpinsdi3)
38391 error ("the last argument must be a 32-bit immediate");
38392 else
38393 error ("the last argument must be an 8-bit immediate");
38394 return const0_rtx;
38397 else
38399 if (i == memory)
38401 /* This must be the memory operand. */
38402 op = ix86_zero_extend_to_Pmode (op);
38403 op = gen_rtx_MEM (mode, op);
38404 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
38405 on it. Try to improve it using get_pointer_alignment,
38406 and if the special builtin is one that requires strict
38407 mode alignment, also from it's GET_MODE_ALIGNMENT.
38408 Failure to do so could lead to ix86_legitimate_combined_insn
38409 rejecting all changes to such insns. */
38410 unsigned int align = get_pointer_alignment (arg);
38411 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
38412 align = GET_MODE_ALIGNMENT (mode);
38413 if (MEM_ALIGN (op) < align)
38414 set_mem_align (op, align);
38416 else
38418 /* This must be register. */
38419 if (VECTOR_MODE_P (mode))
38420 op = safe_vector_operand (op, mode);
38422 op = fixup_modeless_constant (op, mode);
38424 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
38425 op = copy_to_mode_reg (mode, op);
38426 else
38428 op = copy_to_reg (op);
38429 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
38434 args[i].op = op;
38435 args[i].mode = mode;
38438 switch (nargs)
38440 case 0:
38441 pat = GEN_FCN (icode) (target);
38442 break;
38443 case 1:
38444 pat = GEN_FCN (icode) (target, args[0].op);
38445 break;
38446 case 2:
38447 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
38448 break;
38449 case 3:
38450 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
38451 break;
38452 default:
38453 gcc_unreachable ();
38456 if (! pat)
38457 return 0;
38458 emit_insn (pat);
38459 return klass == store ? 0 : target;
38462 /* Return the integer constant in ARG. Constrain it to be in the range
38463 of the subparts of VEC_TYPE; issue an error if not. */
38465 static int
38466 get_element_number (tree vec_type, tree arg)
38468 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
38470 if (!tree_fits_uhwi_p (arg)
38471 || (elt = tree_to_uhwi (arg), elt > max))
38473 error ("selector must be an integer constant in the range 0..%wi", max);
38474 return 0;
38477 return elt;
38480 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38481 ix86_expand_vector_init. We DO have language-level syntax for this, in
38482 the form of (type){ init-list }. Except that since we can't place emms
38483 instructions from inside the compiler, we can't allow the use of MMX
38484 registers unless the user explicitly asks for it. So we do *not* define
38485 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
38486 we have builtins invoked by mmintrin.h that gives us license to emit
38487 these sorts of instructions. */
38489 static rtx
38490 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
38492 machine_mode tmode = TYPE_MODE (type);
38493 machine_mode inner_mode = GET_MODE_INNER (tmode);
38494 int i, n_elt = GET_MODE_NUNITS (tmode);
38495 rtvec v = rtvec_alloc (n_elt);
38497 gcc_assert (VECTOR_MODE_P (tmode));
38498 gcc_assert (call_expr_nargs (exp) == n_elt);
38500 for (i = 0; i < n_elt; ++i)
38502 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
38503 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
38506 if (!target || !register_operand (target, tmode))
38507 target = gen_reg_rtx (tmode);
38509 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
38510 return target;
38513 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38514 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
38515 had a language-level syntax for referencing vector elements. */
38517 static rtx
38518 ix86_expand_vec_ext_builtin (tree exp, rtx target)
38520 machine_mode tmode, mode0;
38521 tree arg0, arg1;
38522 int elt;
38523 rtx op0;
38525 arg0 = CALL_EXPR_ARG (exp, 0);
38526 arg1 = CALL_EXPR_ARG (exp, 1);
38528 op0 = expand_normal (arg0);
38529 elt = get_element_number (TREE_TYPE (arg0), arg1);
38531 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38532 mode0 = TYPE_MODE (TREE_TYPE (arg0));
38533 gcc_assert (VECTOR_MODE_P (mode0));
38535 op0 = force_reg (mode0, op0);
38537 if (optimize || !target || !register_operand (target, tmode))
38538 target = gen_reg_rtx (tmode);
38540 ix86_expand_vector_extract (true, target, op0, elt);
38542 return target;
38545 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38546 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
38547 a language-level syntax for referencing vector elements. */
38549 static rtx
38550 ix86_expand_vec_set_builtin (tree exp)
38552 machine_mode tmode, mode1;
38553 tree arg0, arg1, arg2;
38554 int elt;
38555 rtx op0, op1, target;
38557 arg0 = CALL_EXPR_ARG (exp, 0);
38558 arg1 = CALL_EXPR_ARG (exp, 1);
38559 arg2 = CALL_EXPR_ARG (exp, 2);
38561 tmode = TYPE_MODE (TREE_TYPE (arg0));
38562 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38563 gcc_assert (VECTOR_MODE_P (tmode));
38565 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
38566 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
38567 elt = get_element_number (TREE_TYPE (arg0), arg2);
38569 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
38570 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
38572 op0 = force_reg (tmode, op0);
38573 op1 = force_reg (mode1, op1);
38575 /* OP0 is the source of these builtin functions and shouldn't be
38576 modified. Create a copy, use it and return it as target. */
38577 target = gen_reg_rtx (tmode);
38578 emit_move_insn (target, op0);
38579 ix86_expand_vector_set (true, target, op1, elt);
38581 return target;
38584 /* Emit conditional move of SRC to DST with condition
38585 OP1 CODE OP2. */
38586 static void
38587 ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
38589 rtx t;
38591 if (TARGET_CMOVE)
38593 t = ix86_expand_compare (code, op1, op2);
38594 emit_insn (gen_rtx_SET (dst, gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
38595 src, dst)));
38597 else
38599 rtx_code_label *nomove = gen_label_rtx ();
38600 emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
38601 const0_rtx, GET_MODE (op1), 1, nomove);
38602 emit_move_insn (dst, src);
38603 emit_label (nomove);
38607 /* Choose max of DST and SRC and put it to DST. */
38608 static void
38609 ix86_emit_move_max (rtx dst, rtx src)
38611 ix86_emit_cmove (dst, src, LTU, dst, src);
38614 /* Expand an expression EXP that calls a built-in function,
38615 with result going to TARGET if that's convenient
38616 (and in mode MODE if that's convenient).
38617 SUBTARGET may be used as the target for computing one of EXP's operands.
38618 IGNORE is nonzero if the value is to be ignored. */
38620 static rtx
38621 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
38622 machine_mode mode, int ignore)
38624 const struct builtin_description *d;
38625 size_t i;
38626 enum insn_code icode;
38627 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
38628 tree arg0, arg1, arg2, arg3, arg4;
38629 rtx op0, op1, op2, op3, op4, pat, insn;
38630 machine_mode mode0, mode1, mode2, mode3, mode4;
38631 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
38633 /* For CPU builtins that can be folded, fold first and expand the fold. */
38634 switch (fcode)
38636 case IX86_BUILTIN_CPU_INIT:
38638 /* Make it call __cpu_indicator_init in libgcc. */
38639 tree call_expr, fndecl, type;
38640 type = build_function_type_list (integer_type_node, NULL_TREE);
38641 fndecl = build_fn_decl ("__cpu_indicator_init", type);
38642 call_expr = build_call_expr (fndecl, 0);
38643 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
38645 case IX86_BUILTIN_CPU_IS:
38646 case IX86_BUILTIN_CPU_SUPPORTS:
38648 tree arg0 = CALL_EXPR_ARG (exp, 0);
38649 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
38650 gcc_assert (fold_expr != NULL_TREE);
38651 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
38655 /* Determine whether the builtin function is available under the current ISA.
38656 Originally the builtin was not created if it wasn't applicable to the
38657 current ISA based on the command line switches. With function specific
38658 options, we need to check in the context of the function making the call
38659 whether it is supported. */
38660 if (ix86_builtins_isa[fcode].isa
38661 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
38663 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
38664 NULL, (enum fpmath_unit) 0, false);
38666 if (!opts)
38667 error ("%qE needs unknown isa option", fndecl);
38668 else
38670 gcc_assert (opts != NULL);
38671 error ("%qE needs isa option %s", fndecl, opts);
38672 free (opts);
38674 return const0_rtx;
38677 switch (fcode)
38679 case IX86_BUILTIN_BNDMK:
38680 if (!target
38681 || GET_MODE (target) != BNDmode
38682 || !register_operand (target, BNDmode))
38683 target = gen_reg_rtx (BNDmode);
38685 arg0 = CALL_EXPR_ARG (exp, 0);
38686 arg1 = CALL_EXPR_ARG (exp, 1);
38688 op0 = expand_normal (arg0);
38689 op1 = expand_normal (arg1);
38691 if (!register_operand (op0, Pmode))
38692 op0 = ix86_zero_extend_to_Pmode (op0);
38693 if (!register_operand (op1, Pmode))
38694 op1 = ix86_zero_extend_to_Pmode (op1);
38696 /* Builtin arg1 is size of block but instruction op1 should
38697 be (size - 1). */
38698 op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
38699 NULL_RTX, 1, OPTAB_DIRECT);
38701 emit_insn (BNDmode == BND64mode
38702 ? gen_bnd64_mk (target, op0, op1)
38703 : gen_bnd32_mk (target, op0, op1));
38704 return target;
38706 case IX86_BUILTIN_BNDSTX:
38707 arg0 = CALL_EXPR_ARG (exp, 0);
38708 arg1 = CALL_EXPR_ARG (exp, 1);
38709 arg2 = CALL_EXPR_ARG (exp, 2);
38711 op0 = expand_normal (arg0);
38712 op1 = expand_normal (arg1);
38713 op2 = expand_normal (arg2);
38715 if (!register_operand (op0, Pmode))
38716 op0 = ix86_zero_extend_to_Pmode (op0);
38717 if (!register_operand (op1, BNDmode))
38718 op1 = copy_to_mode_reg (BNDmode, op1);
38719 if (!register_operand (op2, Pmode))
38720 op2 = ix86_zero_extend_to_Pmode (op2);
38722 emit_insn (BNDmode == BND64mode
38723 ? gen_bnd64_stx (op2, op0, op1)
38724 : gen_bnd32_stx (op2, op0, op1));
38725 return 0;
38727 case IX86_BUILTIN_BNDLDX:
38728 if (!target
38729 || GET_MODE (target) != BNDmode
38730 || !register_operand (target, BNDmode))
38731 target = gen_reg_rtx (BNDmode);
38733 arg0 = CALL_EXPR_ARG (exp, 0);
38734 arg1 = CALL_EXPR_ARG (exp, 1);
38736 op0 = expand_normal (arg0);
38737 op1 = expand_normal (arg1);
38739 if (!register_operand (op0, Pmode))
38740 op0 = ix86_zero_extend_to_Pmode (op0);
38741 if (!register_operand (op1, Pmode))
38742 op1 = ix86_zero_extend_to_Pmode (op1);
38744 emit_insn (BNDmode == BND64mode
38745 ? gen_bnd64_ldx (target, op0, op1)
38746 : gen_bnd32_ldx (target, op0, op1));
38747 return target;
38749 case IX86_BUILTIN_BNDCL:
38750 arg0 = CALL_EXPR_ARG (exp, 0);
38751 arg1 = CALL_EXPR_ARG (exp, 1);
38753 op0 = expand_normal (arg0);
38754 op1 = expand_normal (arg1);
38756 if (!register_operand (op0, Pmode))
38757 op0 = ix86_zero_extend_to_Pmode (op0);
38758 if (!register_operand (op1, BNDmode))
38759 op1 = copy_to_mode_reg (BNDmode, op1);
38761 emit_insn (BNDmode == BND64mode
38762 ? gen_bnd64_cl (op1, op0)
38763 : gen_bnd32_cl (op1, op0));
38764 return 0;
38766 case IX86_BUILTIN_BNDCU:
38767 arg0 = CALL_EXPR_ARG (exp, 0);
38768 arg1 = CALL_EXPR_ARG (exp, 1);
38770 op0 = expand_normal (arg0);
38771 op1 = expand_normal (arg1);
38773 if (!register_operand (op0, Pmode))
38774 op0 = ix86_zero_extend_to_Pmode (op0);
38775 if (!register_operand (op1, BNDmode))
38776 op1 = copy_to_mode_reg (BNDmode, op1);
38778 emit_insn (BNDmode == BND64mode
38779 ? gen_bnd64_cu (op1, op0)
38780 : gen_bnd32_cu (op1, op0));
38781 return 0;
38783 case IX86_BUILTIN_BNDRET:
38784 arg0 = CALL_EXPR_ARG (exp, 0);
38785 gcc_assert (TREE_CODE (arg0) == SSA_NAME);
38786 target = chkp_get_rtl_bounds (arg0);
38788 /* If no bounds were specified for returned value,
38789 then use INIT bounds. It usually happens when
38790 some built-in function is expanded. */
38791 if (!target)
38793 rtx t1 = gen_reg_rtx (Pmode);
38794 rtx t2 = gen_reg_rtx (Pmode);
38795 target = gen_reg_rtx (BNDmode);
38796 emit_move_insn (t1, const0_rtx);
38797 emit_move_insn (t2, constm1_rtx);
38798 emit_insn (BNDmode == BND64mode
38799 ? gen_bnd64_mk (target, t1, t2)
38800 : gen_bnd32_mk (target, t1, t2));
38803 gcc_assert (target && REG_P (target));
38804 return target;
38806 case IX86_BUILTIN_BNDNARROW:
38808 rtx m1, m1h1, m1h2, lb, ub, t1;
38810 /* Return value and lb. */
38811 arg0 = CALL_EXPR_ARG (exp, 0);
38812 /* Bounds. */
38813 arg1 = CALL_EXPR_ARG (exp, 1);
38814 /* Size. */
38815 arg2 = CALL_EXPR_ARG (exp, 2);
38817 lb = expand_normal (arg0);
38818 op1 = expand_normal (arg1);
38819 op2 = expand_normal (arg2);
38821 /* Size was passed but we need to use (size - 1) as for bndmk. */
38822 op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
38823 NULL_RTX, 1, OPTAB_DIRECT);
38825 /* Add LB to size and inverse to get UB. */
38826 op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
38827 op2, 1, OPTAB_DIRECT);
38828 ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
38830 if (!register_operand (lb, Pmode))
38831 lb = ix86_zero_extend_to_Pmode (lb);
38832 if (!register_operand (ub, Pmode))
38833 ub = ix86_zero_extend_to_Pmode (ub);
38835 /* We need to move bounds to memory before any computations. */
38836 if (MEM_P (op1))
38837 m1 = op1;
38838 else
38840 m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
38841 emit_move_insn (m1, op1);
38844 /* Generate mem expression to be used for access to LB and UB. */
38845 m1h1 = adjust_address (m1, Pmode, 0);
38846 m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
38848 t1 = gen_reg_rtx (Pmode);
38850 /* Compute LB. */
38851 emit_move_insn (t1, m1h1);
38852 ix86_emit_move_max (t1, lb);
38853 emit_move_insn (m1h1, t1);
38855 /* Compute UB. UB is stored in 1's complement form. Therefore
38856 we also use max here. */
38857 emit_move_insn (t1, m1h2);
38858 ix86_emit_move_max (t1, ub);
38859 emit_move_insn (m1h2, t1);
38861 op2 = gen_reg_rtx (BNDmode);
38862 emit_move_insn (op2, m1);
38864 return chkp_join_splitted_slot (lb, op2);
38867 case IX86_BUILTIN_BNDINT:
38869 rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
38871 if (!target
38872 || GET_MODE (target) != BNDmode
38873 || !register_operand (target, BNDmode))
38874 target = gen_reg_rtx (BNDmode);
38876 arg0 = CALL_EXPR_ARG (exp, 0);
38877 arg1 = CALL_EXPR_ARG (exp, 1);
38879 op0 = expand_normal (arg0);
38880 op1 = expand_normal (arg1);
38882 res = assign_386_stack_local (BNDmode, SLOT_TEMP);
38883 rh1 = adjust_address (res, Pmode, 0);
38884 rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
38886 /* Put first bounds to temporaries. */
38887 lb1 = gen_reg_rtx (Pmode);
38888 ub1 = gen_reg_rtx (Pmode);
38889 if (MEM_P (op0))
38891 emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
38892 emit_move_insn (ub1, adjust_address (op0, Pmode,
38893 GET_MODE_SIZE (Pmode)));
38895 else
38897 emit_move_insn (res, op0);
38898 emit_move_insn (lb1, rh1);
38899 emit_move_insn (ub1, rh2);
38902 /* Put second bounds to temporaries. */
38903 lb2 = gen_reg_rtx (Pmode);
38904 ub2 = gen_reg_rtx (Pmode);
38905 if (MEM_P (op1))
38907 emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
38908 emit_move_insn (ub2, adjust_address (op1, Pmode,
38909 GET_MODE_SIZE (Pmode)));
38911 else
38913 emit_move_insn (res, op1);
38914 emit_move_insn (lb2, rh1);
38915 emit_move_insn (ub2, rh2);
38918 /* Compute LB. */
38919 ix86_emit_move_max (lb1, lb2);
38920 emit_move_insn (rh1, lb1);
38922 /* Compute UB. UB is stored in 1's complement form. Therefore
38923 we also use max here. */
38924 ix86_emit_move_max (ub1, ub2);
38925 emit_move_insn (rh2, ub1);
38927 emit_move_insn (target, res);
38929 return target;
38932 case IX86_BUILTIN_SIZEOF:
38934 tree name;
38935 rtx symbol;
38937 if (!target
38938 || GET_MODE (target) != Pmode
38939 || !register_operand (target, Pmode))
38940 target = gen_reg_rtx (Pmode);
38942 arg0 = CALL_EXPR_ARG (exp, 0);
38943 gcc_assert (TREE_CODE (arg0) == VAR_DECL);
38945 name = DECL_ASSEMBLER_NAME (arg0);
38946 symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
38948 emit_insn (Pmode == SImode
38949 ? gen_move_size_reloc_si (target, symbol)
38950 : gen_move_size_reloc_di (target, symbol));
38952 return target;
38955 case IX86_BUILTIN_BNDLOWER:
38957 rtx mem, hmem;
38959 if (!target
38960 || GET_MODE (target) != Pmode
38961 || !register_operand (target, Pmode))
38962 target = gen_reg_rtx (Pmode);
38964 arg0 = CALL_EXPR_ARG (exp, 0);
38965 op0 = expand_normal (arg0);
38967 /* We need to move bounds to memory first. */
38968 if (MEM_P (op0))
38969 mem = op0;
38970 else
38972 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
38973 emit_move_insn (mem, op0);
38976 /* Generate mem expression to access LB and load it. */
38977 hmem = adjust_address (mem, Pmode, 0);
38978 emit_move_insn (target, hmem);
38980 return target;
38983 case IX86_BUILTIN_BNDUPPER:
38985 rtx mem, hmem, res;
38987 if (!target
38988 || GET_MODE (target) != Pmode
38989 || !register_operand (target, Pmode))
38990 target = gen_reg_rtx (Pmode);
38992 arg0 = CALL_EXPR_ARG (exp, 0);
38993 op0 = expand_normal (arg0);
38995 /* We need to move bounds to memory first. */
38996 if (MEM_P (op0))
38997 mem = op0;
38998 else
39000 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
39001 emit_move_insn (mem, op0);
39004 /* Generate mem expression to access UB. */
39005 hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
39007 /* We need to inverse all bits of UB. */
39008 res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
39010 if (res != target)
39011 emit_move_insn (target, res);
39013 return target;
39016 case IX86_BUILTIN_MASKMOVQ:
39017 case IX86_BUILTIN_MASKMOVDQU:
39018 icode = (fcode == IX86_BUILTIN_MASKMOVQ
39019 ? CODE_FOR_mmx_maskmovq
39020 : CODE_FOR_sse2_maskmovdqu);
39021 /* Note the arg order is different from the operand order. */
39022 arg1 = CALL_EXPR_ARG (exp, 0);
39023 arg2 = CALL_EXPR_ARG (exp, 1);
39024 arg0 = CALL_EXPR_ARG (exp, 2);
39025 op0 = expand_normal (arg0);
39026 op1 = expand_normal (arg1);
39027 op2 = expand_normal (arg2);
39028 mode0 = insn_data[icode].operand[0].mode;
39029 mode1 = insn_data[icode].operand[1].mode;
39030 mode2 = insn_data[icode].operand[2].mode;
39032 op0 = ix86_zero_extend_to_Pmode (op0);
39033 op0 = gen_rtx_MEM (mode1, op0);
39035 if (!insn_data[icode].operand[0].predicate (op0, mode0))
39036 op0 = copy_to_mode_reg (mode0, op0);
39037 if (!insn_data[icode].operand[1].predicate (op1, mode1))
39038 op1 = copy_to_mode_reg (mode1, op1);
39039 if (!insn_data[icode].operand[2].predicate (op2, mode2))
39040 op2 = copy_to_mode_reg (mode2, op2);
39041 pat = GEN_FCN (icode) (op0, op1, op2);
39042 if (! pat)
39043 return 0;
39044 emit_insn (pat);
39045 return 0;
39047 case IX86_BUILTIN_LDMXCSR:
39048 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
39049 target = assign_386_stack_local (SImode, SLOT_TEMP);
39050 emit_move_insn (target, op0);
39051 emit_insn (gen_sse_ldmxcsr (target));
39052 return 0;
39054 case IX86_BUILTIN_STMXCSR:
39055 target = assign_386_stack_local (SImode, SLOT_TEMP);
39056 emit_insn (gen_sse_stmxcsr (target));
39057 return copy_to_mode_reg (SImode, target);
39059 case IX86_BUILTIN_CLFLUSH:
39060 arg0 = CALL_EXPR_ARG (exp, 0);
39061 op0 = expand_normal (arg0);
39062 icode = CODE_FOR_sse2_clflush;
39063 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39064 op0 = ix86_zero_extend_to_Pmode (op0);
39066 emit_insn (gen_sse2_clflush (op0));
39067 return 0;
39069 case IX86_BUILTIN_CLWB:
39070 arg0 = CALL_EXPR_ARG (exp, 0);
39071 op0 = expand_normal (arg0);
39072 icode = CODE_FOR_clwb;
39073 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39074 op0 = ix86_zero_extend_to_Pmode (op0);
39076 emit_insn (gen_clwb (op0));
39077 return 0;
39079 case IX86_BUILTIN_CLFLUSHOPT:
39080 arg0 = CALL_EXPR_ARG (exp, 0);
39081 op0 = expand_normal (arg0);
39082 icode = CODE_FOR_clflushopt;
39083 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39084 op0 = ix86_zero_extend_to_Pmode (op0);
39086 emit_insn (gen_clflushopt (op0));
39087 return 0;
39089 case IX86_BUILTIN_MONITOR:
39090 case IX86_BUILTIN_MONITORX:
39091 arg0 = CALL_EXPR_ARG (exp, 0);
39092 arg1 = CALL_EXPR_ARG (exp, 1);
39093 arg2 = CALL_EXPR_ARG (exp, 2);
39094 op0 = expand_normal (arg0);
39095 op1 = expand_normal (arg1);
39096 op2 = expand_normal (arg2);
39097 if (!REG_P (op0))
39098 op0 = ix86_zero_extend_to_Pmode (op0);
39099 if (!REG_P (op1))
39100 op1 = copy_to_mode_reg (SImode, op1);
39101 if (!REG_P (op2))
39102 op2 = copy_to_mode_reg (SImode, op2);
39104 emit_insn (fcode == IX86_BUILTIN_MONITOR
39105 ? ix86_gen_monitor (op0, op1, op2)
39106 : ix86_gen_monitorx (op0, op1, op2));
39107 return 0;
39109 case IX86_BUILTIN_MWAIT:
39110 arg0 = CALL_EXPR_ARG (exp, 0);
39111 arg1 = CALL_EXPR_ARG (exp, 1);
39112 op0 = expand_normal (arg0);
39113 op1 = expand_normal (arg1);
39114 if (!REG_P (op0))
39115 op0 = copy_to_mode_reg (SImode, op0);
39116 if (!REG_P (op1))
39117 op1 = copy_to_mode_reg (SImode, op1);
39118 emit_insn (gen_sse3_mwait (op0, op1));
39119 return 0;
39121 case IX86_BUILTIN_MWAITX:
39122 arg0 = CALL_EXPR_ARG (exp, 0);
39123 arg1 = CALL_EXPR_ARG (exp, 1);
39124 arg2 = CALL_EXPR_ARG (exp, 2);
39125 op0 = expand_normal (arg0);
39126 op1 = expand_normal (arg1);
39127 op2 = expand_normal (arg2);
39128 if (!REG_P (op0))
39129 op0 = copy_to_mode_reg (SImode, op0);
39130 if (!REG_P (op1))
39131 op1 = copy_to_mode_reg (SImode, op1);
39132 if (!REG_P (op2))
39133 op2 = copy_to_mode_reg (SImode, op2);
39134 emit_insn (gen_mwaitx (op0, op1, op2));
39135 return 0;
39137 case IX86_BUILTIN_VEC_INIT_V2SI:
39138 case IX86_BUILTIN_VEC_INIT_V4HI:
39139 case IX86_BUILTIN_VEC_INIT_V8QI:
39140 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
39142 case IX86_BUILTIN_VEC_EXT_V2DF:
39143 case IX86_BUILTIN_VEC_EXT_V2DI:
39144 case IX86_BUILTIN_VEC_EXT_V4SF:
39145 case IX86_BUILTIN_VEC_EXT_V4SI:
39146 case IX86_BUILTIN_VEC_EXT_V8HI:
39147 case IX86_BUILTIN_VEC_EXT_V2SI:
39148 case IX86_BUILTIN_VEC_EXT_V4HI:
39149 case IX86_BUILTIN_VEC_EXT_V16QI:
39150 return ix86_expand_vec_ext_builtin (exp, target);
39152 case IX86_BUILTIN_VEC_SET_V2DI:
39153 case IX86_BUILTIN_VEC_SET_V4SF:
39154 case IX86_BUILTIN_VEC_SET_V4SI:
39155 case IX86_BUILTIN_VEC_SET_V8HI:
39156 case IX86_BUILTIN_VEC_SET_V4HI:
39157 case IX86_BUILTIN_VEC_SET_V16QI:
39158 return ix86_expand_vec_set_builtin (exp);
39160 case IX86_BUILTIN_INFQ:
39161 case IX86_BUILTIN_HUGE_VALQ:
39163 REAL_VALUE_TYPE inf;
39164 rtx tmp;
39166 real_inf (&inf);
39167 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
39169 tmp = validize_mem (force_const_mem (mode, tmp));
39171 if (target == 0)
39172 target = gen_reg_rtx (mode);
39174 emit_move_insn (target, tmp);
39175 return target;
39178 case IX86_BUILTIN_RDPMC:
39179 case IX86_BUILTIN_RDTSC:
39180 case IX86_BUILTIN_RDTSCP:
39182 op0 = gen_reg_rtx (DImode);
39183 op1 = gen_reg_rtx (DImode);
39185 if (fcode == IX86_BUILTIN_RDPMC)
39187 arg0 = CALL_EXPR_ARG (exp, 0);
39188 op2 = expand_normal (arg0);
39189 if (!register_operand (op2, SImode))
39190 op2 = copy_to_mode_reg (SImode, op2);
39192 insn = (TARGET_64BIT
39193 ? gen_rdpmc_rex64 (op0, op1, op2)
39194 : gen_rdpmc (op0, op2));
39195 emit_insn (insn);
39197 else if (fcode == IX86_BUILTIN_RDTSC)
39199 insn = (TARGET_64BIT
39200 ? gen_rdtsc_rex64 (op0, op1)
39201 : gen_rdtsc (op0));
39202 emit_insn (insn);
39204 else
39206 op2 = gen_reg_rtx (SImode);
39208 insn = (TARGET_64BIT
39209 ? gen_rdtscp_rex64 (op0, op1, op2)
39210 : gen_rdtscp (op0, op2));
39211 emit_insn (insn);
39213 arg0 = CALL_EXPR_ARG (exp, 0);
39214 op4 = expand_normal (arg0);
39215 if (!address_operand (op4, VOIDmode))
39217 op4 = convert_memory_address (Pmode, op4);
39218 op4 = copy_addr_to_reg (op4);
39220 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
39223 if (target == 0)
39225 /* mode is VOIDmode if __builtin_rd* has been called
39226 without lhs. */
39227 if (mode == VOIDmode)
39228 return target;
39229 target = gen_reg_rtx (mode);
39232 if (TARGET_64BIT)
39234 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
39235 op1, 1, OPTAB_DIRECT);
39236 op0 = expand_simple_binop (DImode, IOR, op0, op1,
39237 op0, 1, OPTAB_DIRECT);
39240 emit_move_insn (target, op0);
39241 return target;
39243 case IX86_BUILTIN_FXSAVE:
39244 case IX86_BUILTIN_FXRSTOR:
39245 case IX86_BUILTIN_FXSAVE64:
39246 case IX86_BUILTIN_FXRSTOR64:
39247 case IX86_BUILTIN_FNSTENV:
39248 case IX86_BUILTIN_FLDENV:
39249 mode0 = BLKmode;
39250 switch (fcode)
39252 case IX86_BUILTIN_FXSAVE:
39253 icode = CODE_FOR_fxsave;
39254 break;
39255 case IX86_BUILTIN_FXRSTOR:
39256 icode = CODE_FOR_fxrstor;
39257 break;
39258 case IX86_BUILTIN_FXSAVE64:
39259 icode = CODE_FOR_fxsave64;
39260 break;
39261 case IX86_BUILTIN_FXRSTOR64:
39262 icode = CODE_FOR_fxrstor64;
39263 break;
39264 case IX86_BUILTIN_FNSTENV:
39265 icode = CODE_FOR_fnstenv;
39266 break;
39267 case IX86_BUILTIN_FLDENV:
39268 icode = CODE_FOR_fldenv;
39269 break;
39270 default:
39271 gcc_unreachable ();
39274 arg0 = CALL_EXPR_ARG (exp, 0);
39275 op0 = expand_normal (arg0);
39277 if (!address_operand (op0, VOIDmode))
39279 op0 = convert_memory_address (Pmode, op0);
39280 op0 = copy_addr_to_reg (op0);
39282 op0 = gen_rtx_MEM (mode0, op0);
39284 pat = GEN_FCN (icode) (op0);
39285 if (pat)
39286 emit_insn (pat);
39287 return 0;
39289 case IX86_BUILTIN_XSAVE:
39290 case IX86_BUILTIN_XRSTOR:
39291 case IX86_BUILTIN_XSAVE64:
39292 case IX86_BUILTIN_XRSTOR64:
39293 case IX86_BUILTIN_XSAVEOPT:
39294 case IX86_BUILTIN_XSAVEOPT64:
39295 case IX86_BUILTIN_XSAVES:
39296 case IX86_BUILTIN_XRSTORS:
39297 case IX86_BUILTIN_XSAVES64:
39298 case IX86_BUILTIN_XRSTORS64:
39299 case IX86_BUILTIN_XSAVEC:
39300 case IX86_BUILTIN_XSAVEC64:
39301 arg0 = CALL_EXPR_ARG (exp, 0);
39302 arg1 = CALL_EXPR_ARG (exp, 1);
39303 op0 = expand_normal (arg0);
39304 op1 = expand_normal (arg1);
39306 if (!address_operand (op0, VOIDmode))
39308 op0 = convert_memory_address (Pmode, op0);
39309 op0 = copy_addr_to_reg (op0);
39311 op0 = gen_rtx_MEM (BLKmode, op0);
39313 op1 = force_reg (DImode, op1);
39315 if (TARGET_64BIT)
39317 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
39318 NULL, 1, OPTAB_DIRECT);
39319 switch (fcode)
39321 case IX86_BUILTIN_XSAVE:
39322 icode = CODE_FOR_xsave_rex64;
39323 break;
39324 case IX86_BUILTIN_XRSTOR:
39325 icode = CODE_FOR_xrstor_rex64;
39326 break;
39327 case IX86_BUILTIN_XSAVE64:
39328 icode = CODE_FOR_xsave64;
39329 break;
39330 case IX86_BUILTIN_XRSTOR64:
39331 icode = CODE_FOR_xrstor64;
39332 break;
39333 case IX86_BUILTIN_XSAVEOPT:
39334 icode = CODE_FOR_xsaveopt_rex64;
39335 break;
39336 case IX86_BUILTIN_XSAVEOPT64:
39337 icode = CODE_FOR_xsaveopt64;
39338 break;
39339 case IX86_BUILTIN_XSAVES:
39340 icode = CODE_FOR_xsaves_rex64;
39341 break;
39342 case IX86_BUILTIN_XRSTORS:
39343 icode = CODE_FOR_xrstors_rex64;
39344 break;
39345 case IX86_BUILTIN_XSAVES64:
39346 icode = CODE_FOR_xsaves64;
39347 break;
39348 case IX86_BUILTIN_XRSTORS64:
39349 icode = CODE_FOR_xrstors64;
39350 break;
39351 case IX86_BUILTIN_XSAVEC:
39352 icode = CODE_FOR_xsavec_rex64;
39353 break;
39354 case IX86_BUILTIN_XSAVEC64:
39355 icode = CODE_FOR_xsavec64;
39356 break;
39357 default:
39358 gcc_unreachable ();
39361 op2 = gen_lowpart (SImode, op2);
39362 op1 = gen_lowpart (SImode, op1);
39363 pat = GEN_FCN (icode) (op0, op1, op2);
39365 else
39367 switch (fcode)
39369 case IX86_BUILTIN_XSAVE:
39370 icode = CODE_FOR_xsave;
39371 break;
39372 case IX86_BUILTIN_XRSTOR:
39373 icode = CODE_FOR_xrstor;
39374 break;
39375 case IX86_BUILTIN_XSAVEOPT:
39376 icode = CODE_FOR_xsaveopt;
39377 break;
39378 case IX86_BUILTIN_XSAVES:
39379 icode = CODE_FOR_xsaves;
39380 break;
39381 case IX86_BUILTIN_XRSTORS:
39382 icode = CODE_FOR_xrstors;
39383 break;
39384 case IX86_BUILTIN_XSAVEC:
39385 icode = CODE_FOR_xsavec;
39386 break;
39387 default:
39388 gcc_unreachable ();
39390 pat = GEN_FCN (icode) (op0, op1);
39393 if (pat)
39394 emit_insn (pat);
39395 return 0;
39397 case IX86_BUILTIN_LLWPCB:
39398 arg0 = CALL_EXPR_ARG (exp, 0);
39399 op0 = expand_normal (arg0);
39400 icode = CODE_FOR_lwp_llwpcb;
39401 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39402 op0 = ix86_zero_extend_to_Pmode (op0);
39403 emit_insn (gen_lwp_llwpcb (op0));
39404 return 0;
39406 case IX86_BUILTIN_SLWPCB:
39407 icode = CODE_FOR_lwp_slwpcb;
39408 if (!target
39409 || !insn_data[icode].operand[0].predicate (target, Pmode))
39410 target = gen_reg_rtx (Pmode);
39411 emit_insn (gen_lwp_slwpcb (target));
39412 return target;
39414 case IX86_BUILTIN_BEXTRI32:
39415 case IX86_BUILTIN_BEXTRI64:
39416 arg0 = CALL_EXPR_ARG (exp, 0);
39417 arg1 = CALL_EXPR_ARG (exp, 1);
39418 op0 = expand_normal (arg0);
39419 op1 = expand_normal (arg1);
39420 icode = (fcode == IX86_BUILTIN_BEXTRI32
39421 ? CODE_FOR_tbm_bextri_si
39422 : CODE_FOR_tbm_bextri_di);
39423 if (!CONST_INT_P (op1))
39425 error ("last argument must be an immediate");
39426 return const0_rtx;
39428 else
39430 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
39431 unsigned char lsb_index = INTVAL (op1) & 0xFF;
39432 op1 = GEN_INT (length);
39433 op2 = GEN_INT (lsb_index);
39434 pat = GEN_FCN (icode) (target, op0, op1, op2);
39435 if (pat)
39436 emit_insn (pat);
39437 return target;
39440 case IX86_BUILTIN_RDRAND16_STEP:
39441 icode = CODE_FOR_rdrandhi_1;
39442 mode0 = HImode;
39443 goto rdrand_step;
39445 case IX86_BUILTIN_RDRAND32_STEP:
39446 icode = CODE_FOR_rdrandsi_1;
39447 mode0 = SImode;
39448 goto rdrand_step;
39450 case IX86_BUILTIN_RDRAND64_STEP:
39451 icode = CODE_FOR_rdranddi_1;
39452 mode0 = DImode;
39454 rdrand_step:
39455 op0 = gen_reg_rtx (mode0);
39456 emit_insn (GEN_FCN (icode) (op0));
39458 arg0 = CALL_EXPR_ARG (exp, 0);
39459 op1 = expand_normal (arg0);
39460 if (!address_operand (op1, VOIDmode))
39462 op1 = convert_memory_address (Pmode, op1);
39463 op1 = copy_addr_to_reg (op1);
39465 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39467 op1 = gen_reg_rtx (SImode);
39468 emit_move_insn (op1, CONST1_RTX (SImode));
39470 /* Emit SImode conditional move. */
39471 if (mode0 == HImode)
39473 op2 = gen_reg_rtx (SImode);
39474 emit_insn (gen_zero_extendhisi2 (op2, op0));
39476 else if (mode0 == SImode)
39477 op2 = op0;
39478 else
39479 op2 = gen_rtx_SUBREG (SImode, op0, 0);
39481 if (target == 0
39482 || !register_operand (target, SImode))
39483 target = gen_reg_rtx (SImode);
39485 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
39486 const0_rtx);
39487 emit_insn (gen_rtx_SET (target,
39488 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
39489 return target;
39491 case IX86_BUILTIN_RDSEED16_STEP:
39492 icode = CODE_FOR_rdseedhi_1;
39493 mode0 = HImode;
39494 goto rdseed_step;
39496 case IX86_BUILTIN_RDSEED32_STEP:
39497 icode = CODE_FOR_rdseedsi_1;
39498 mode0 = SImode;
39499 goto rdseed_step;
39501 case IX86_BUILTIN_RDSEED64_STEP:
39502 icode = CODE_FOR_rdseeddi_1;
39503 mode0 = DImode;
39505 rdseed_step:
39506 op0 = gen_reg_rtx (mode0);
39507 emit_insn (GEN_FCN (icode) (op0));
39509 arg0 = CALL_EXPR_ARG (exp, 0);
39510 op1 = expand_normal (arg0);
39511 if (!address_operand (op1, VOIDmode))
39513 op1 = convert_memory_address (Pmode, op1);
39514 op1 = copy_addr_to_reg (op1);
39516 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39518 op2 = gen_reg_rtx (QImode);
39520 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
39521 const0_rtx);
39522 emit_insn (gen_rtx_SET (op2, pat));
39524 if (target == 0
39525 || !register_operand (target, SImode))
39526 target = gen_reg_rtx (SImode);
39528 emit_insn (gen_zero_extendqisi2 (target, op2));
39529 return target;
39531 case IX86_BUILTIN_SBB32:
39532 icode = CODE_FOR_subborrowsi;
39533 mode0 = SImode;
39534 goto handlecarry;
39536 case IX86_BUILTIN_SBB64:
39537 icode = CODE_FOR_subborrowdi;
39538 mode0 = DImode;
39539 goto handlecarry;
39541 case IX86_BUILTIN_ADDCARRYX32:
39542 icode = CODE_FOR_addcarrysi;
39543 mode0 = SImode;
39544 goto handlecarry;
39546 case IX86_BUILTIN_ADDCARRYX64:
39547 icode = CODE_FOR_addcarrydi;
39548 mode0 = DImode;
39550 handlecarry:
39551 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
39552 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
39553 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
39554 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
39556 op1 = expand_normal (arg0);
39557 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
39559 op2 = expand_normal (arg1);
39560 if (!register_operand (op2, mode0))
39561 op2 = copy_to_mode_reg (mode0, op2);
39563 op3 = expand_normal (arg2);
39564 if (!register_operand (op3, mode0))
39565 op3 = copy_to_mode_reg (mode0, op3);
39567 op4 = expand_normal (arg3);
39568 if (!address_operand (op4, VOIDmode))
39570 op4 = convert_memory_address (Pmode, op4);
39571 op4 = copy_addr_to_reg (op4);
39574 /* Generate CF from input operand. */
39575 emit_insn (gen_addqi3_cconly_overflow (op1, constm1_rtx));
39577 /* Generate instruction that consumes CF. */
39578 op0 = gen_reg_rtx (mode0);
39580 op1 = gen_rtx_REG (CCCmode, FLAGS_REG);
39581 pat = gen_rtx_LTU (mode0, op1, const0_rtx);
39582 emit_insn (GEN_FCN (icode) (op0, op2, op3, op1, pat));
39584 /* Return current CF value. */
39585 if (target == 0)
39586 target = gen_reg_rtx (QImode);
39588 PUT_MODE (pat, QImode);
39589 emit_insn (gen_rtx_SET (target, pat));
39591 /* Store the result. */
39592 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
39594 return target;
39596 case IX86_BUILTIN_READ_FLAGS:
39597 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
39599 if (optimize
39600 || target == NULL_RTX
39601 || !nonimmediate_operand (target, word_mode)
39602 || GET_MODE (target) != word_mode)
39603 target = gen_reg_rtx (word_mode);
39605 emit_insn (gen_pop (target));
39606 return target;
39608 case IX86_BUILTIN_WRITE_FLAGS:
39610 arg0 = CALL_EXPR_ARG (exp, 0);
39611 op0 = expand_normal (arg0);
39612 if (!general_no_elim_operand (op0, word_mode))
39613 op0 = copy_to_mode_reg (word_mode, op0);
39615 emit_insn (gen_push (op0));
39616 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
39617 return 0;
39619 case IX86_BUILTIN_KORTESTC16:
39620 icode = CODE_FOR_kortestchi;
39621 mode0 = HImode;
39622 mode1 = CCCmode;
39623 goto kortest;
39625 case IX86_BUILTIN_KORTESTZ16:
39626 icode = CODE_FOR_kortestzhi;
39627 mode0 = HImode;
39628 mode1 = CCZmode;
39630 kortest:
39631 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
39632 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
39633 op0 = expand_normal (arg0);
39634 op1 = expand_normal (arg1);
39636 op0 = copy_to_reg (op0);
39637 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
39638 op1 = copy_to_reg (op1);
39639 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
39641 target = gen_reg_rtx (QImode);
39642 emit_insn (gen_rtx_SET (target, const0_rtx));
39644 /* Emit kortest. */
39645 emit_insn (GEN_FCN (icode) (op0, op1));
39646 /* And use setcc to return result from flags. */
39647 ix86_expand_setcc (target, EQ,
39648 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
39649 return target;
39651 case IX86_BUILTIN_GATHERSIV2DF:
39652 icode = CODE_FOR_avx2_gathersiv2df;
39653 goto gather_gen;
39654 case IX86_BUILTIN_GATHERSIV4DF:
39655 icode = CODE_FOR_avx2_gathersiv4df;
39656 goto gather_gen;
39657 case IX86_BUILTIN_GATHERDIV2DF:
39658 icode = CODE_FOR_avx2_gatherdiv2df;
39659 goto gather_gen;
39660 case IX86_BUILTIN_GATHERDIV4DF:
39661 icode = CODE_FOR_avx2_gatherdiv4df;
39662 goto gather_gen;
39663 case IX86_BUILTIN_GATHERSIV4SF:
39664 icode = CODE_FOR_avx2_gathersiv4sf;
39665 goto gather_gen;
39666 case IX86_BUILTIN_GATHERSIV8SF:
39667 icode = CODE_FOR_avx2_gathersiv8sf;
39668 goto gather_gen;
39669 case IX86_BUILTIN_GATHERDIV4SF:
39670 icode = CODE_FOR_avx2_gatherdiv4sf;
39671 goto gather_gen;
39672 case IX86_BUILTIN_GATHERDIV8SF:
39673 icode = CODE_FOR_avx2_gatherdiv8sf;
39674 goto gather_gen;
39675 case IX86_BUILTIN_GATHERSIV2DI:
39676 icode = CODE_FOR_avx2_gathersiv2di;
39677 goto gather_gen;
39678 case IX86_BUILTIN_GATHERSIV4DI:
39679 icode = CODE_FOR_avx2_gathersiv4di;
39680 goto gather_gen;
39681 case IX86_BUILTIN_GATHERDIV2DI:
39682 icode = CODE_FOR_avx2_gatherdiv2di;
39683 goto gather_gen;
39684 case IX86_BUILTIN_GATHERDIV4DI:
39685 icode = CODE_FOR_avx2_gatherdiv4di;
39686 goto gather_gen;
39687 case IX86_BUILTIN_GATHERSIV4SI:
39688 icode = CODE_FOR_avx2_gathersiv4si;
39689 goto gather_gen;
39690 case IX86_BUILTIN_GATHERSIV8SI:
39691 icode = CODE_FOR_avx2_gathersiv8si;
39692 goto gather_gen;
39693 case IX86_BUILTIN_GATHERDIV4SI:
39694 icode = CODE_FOR_avx2_gatherdiv4si;
39695 goto gather_gen;
39696 case IX86_BUILTIN_GATHERDIV8SI:
39697 icode = CODE_FOR_avx2_gatherdiv8si;
39698 goto gather_gen;
39699 case IX86_BUILTIN_GATHERALTSIV4DF:
39700 icode = CODE_FOR_avx2_gathersiv4df;
39701 goto gather_gen;
39702 case IX86_BUILTIN_GATHERALTDIV8SF:
39703 icode = CODE_FOR_avx2_gatherdiv8sf;
39704 goto gather_gen;
39705 case IX86_BUILTIN_GATHERALTSIV4DI:
39706 icode = CODE_FOR_avx2_gathersiv4di;
39707 goto gather_gen;
39708 case IX86_BUILTIN_GATHERALTDIV8SI:
39709 icode = CODE_FOR_avx2_gatherdiv8si;
39710 goto gather_gen;
39711 case IX86_BUILTIN_GATHER3SIV16SF:
39712 icode = CODE_FOR_avx512f_gathersiv16sf;
39713 goto gather_gen;
39714 case IX86_BUILTIN_GATHER3SIV8DF:
39715 icode = CODE_FOR_avx512f_gathersiv8df;
39716 goto gather_gen;
39717 case IX86_BUILTIN_GATHER3DIV16SF:
39718 icode = CODE_FOR_avx512f_gatherdiv16sf;
39719 goto gather_gen;
39720 case IX86_BUILTIN_GATHER3DIV8DF:
39721 icode = CODE_FOR_avx512f_gatherdiv8df;
39722 goto gather_gen;
39723 case IX86_BUILTIN_GATHER3SIV16SI:
39724 icode = CODE_FOR_avx512f_gathersiv16si;
39725 goto gather_gen;
39726 case IX86_BUILTIN_GATHER3SIV8DI:
39727 icode = CODE_FOR_avx512f_gathersiv8di;
39728 goto gather_gen;
39729 case IX86_BUILTIN_GATHER3DIV16SI:
39730 icode = CODE_FOR_avx512f_gatherdiv16si;
39731 goto gather_gen;
39732 case IX86_BUILTIN_GATHER3DIV8DI:
39733 icode = CODE_FOR_avx512f_gatherdiv8di;
39734 goto gather_gen;
39735 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39736 icode = CODE_FOR_avx512f_gathersiv8df;
39737 goto gather_gen;
39738 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39739 icode = CODE_FOR_avx512f_gatherdiv16sf;
39740 goto gather_gen;
39741 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39742 icode = CODE_FOR_avx512f_gathersiv8di;
39743 goto gather_gen;
39744 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39745 icode = CODE_FOR_avx512f_gatherdiv16si;
39746 goto gather_gen;
39747 case IX86_BUILTIN_GATHER3SIV2DF:
39748 icode = CODE_FOR_avx512vl_gathersiv2df;
39749 goto gather_gen;
39750 case IX86_BUILTIN_GATHER3SIV4DF:
39751 icode = CODE_FOR_avx512vl_gathersiv4df;
39752 goto gather_gen;
39753 case IX86_BUILTIN_GATHER3DIV2DF:
39754 icode = CODE_FOR_avx512vl_gatherdiv2df;
39755 goto gather_gen;
39756 case IX86_BUILTIN_GATHER3DIV4DF:
39757 icode = CODE_FOR_avx512vl_gatherdiv4df;
39758 goto gather_gen;
39759 case IX86_BUILTIN_GATHER3SIV4SF:
39760 icode = CODE_FOR_avx512vl_gathersiv4sf;
39761 goto gather_gen;
39762 case IX86_BUILTIN_GATHER3SIV8SF:
39763 icode = CODE_FOR_avx512vl_gathersiv8sf;
39764 goto gather_gen;
39765 case IX86_BUILTIN_GATHER3DIV4SF:
39766 icode = CODE_FOR_avx512vl_gatherdiv4sf;
39767 goto gather_gen;
39768 case IX86_BUILTIN_GATHER3DIV8SF:
39769 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39770 goto gather_gen;
39771 case IX86_BUILTIN_GATHER3SIV2DI:
39772 icode = CODE_FOR_avx512vl_gathersiv2di;
39773 goto gather_gen;
39774 case IX86_BUILTIN_GATHER3SIV4DI:
39775 icode = CODE_FOR_avx512vl_gathersiv4di;
39776 goto gather_gen;
39777 case IX86_BUILTIN_GATHER3DIV2DI:
39778 icode = CODE_FOR_avx512vl_gatherdiv2di;
39779 goto gather_gen;
39780 case IX86_BUILTIN_GATHER3DIV4DI:
39781 icode = CODE_FOR_avx512vl_gatherdiv4di;
39782 goto gather_gen;
39783 case IX86_BUILTIN_GATHER3SIV4SI:
39784 icode = CODE_FOR_avx512vl_gathersiv4si;
39785 goto gather_gen;
39786 case IX86_BUILTIN_GATHER3SIV8SI:
39787 icode = CODE_FOR_avx512vl_gathersiv8si;
39788 goto gather_gen;
39789 case IX86_BUILTIN_GATHER3DIV4SI:
39790 icode = CODE_FOR_avx512vl_gatherdiv4si;
39791 goto gather_gen;
39792 case IX86_BUILTIN_GATHER3DIV8SI:
39793 icode = CODE_FOR_avx512vl_gatherdiv8si;
39794 goto gather_gen;
39795 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39796 icode = CODE_FOR_avx512vl_gathersiv4df;
39797 goto gather_gen;
39798 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39799 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39800 goto gather_gen;
39801 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39802 icode = CODE_FOR_avx512vl_gathersiv4di;
39803 goto gather_gen;
39804 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39805 icode = CODE_FOR_avx512vl_gatherdiv8si;
39806 goto gather_gen;
39807 case IX86_BUILTIN_SCATTERSIV16SF:
39808 icode = CODE_FOR_avx512f_scattersiv16sf;
39809 goto scatter_gen;
39810 case IX86_BUILTIN_SCATTERSIV8DF:
39811 icode = CODE_FOR_avx512f_scattersiv8df;
39812 goto scatter_gen;
39813 case IX86_BUILTIN_SCATTERDIV16SF:
39814 icode = CODE_FOR_avx512f_scatterdiv16sf;
39815 goto scatter_gen;
39816 case IX86_BUILTIN_SCATTERDIV8DF:
39817 icode = CODE_FOR_avx512f_scatterdiv8df;
39818 goto scatter_gen;
39819 case IX86_BUILTIN_SCATTERSIV16SI:
39820 icode = CODE_FOR_avx512f_scattersiv16si;
39821 goto scatter_gen;
39822 case IX86_BUILTIN_SCATTERSIV8DI:
39823 icode = CODE_FOR_avx512f_scattersiv8di;
39824 goto scatter_gen;
39825 case IX86_BUILTIN_SCATTERDIV16SI:
39826 icode = CODE_FOR_avx512f_scatterdiv16si;
39827 goto scatter_gen;
39828 case IX86_BUILTIN_SCATTERDIV8DI:
39829 icode = CODE_FOR_avx512f_scatterdiv8di;
39830 goto scatter_gen;
39831 case IX86_BUILTIN_SCATTERSIV8SF:
39832 icode = CODE_FOR_avx512vl_scattersiv8sf;
39833 goto scatter_gen;
39834 case IX86_BUILTIN_SCATTERSIV4SF:
39835 icode = CODE_FOR_avx512vl_scattersiv4sf;
39836 goto scatter_gen;
39837 case IX86_BUILTIN_SCATTERSIV4DF:
39838 icode = CODE_FOR_avx512vl_scattersiv4df;
39839 goto scatter_gen;
39840 case IX86_BUILTIN_SCATTERSIV2DF:
39841 icode = CODE_FOR_avx512vl_scattersiv2df;
39842 goto scatter_gen;
39843 case IX86_BUILTIN_SCATTERDIV8SF:
39844 icode = CODE_FOR_avx512vl_scatterdiv8sf;
39845 goto scatter_gen;
39846 case IX86_BUILTIN_SCATTERDIV4SF:
39847 icode = CODE_FOR_avx512vl_scatterdiv4sf;
39848 goto scatter_gen;
39849 case IX86_BUILTIN_SCATTERDIV4DF:
39850 icode = CODE_FOR_avx512vl_scatterdiv4df;
39851 goto scatter_gen;
39852 case IX86_BUILTIN_SCATTERDIV2DF:
39853 icode = CODE_FOR_avx512vl_scatterdiv2df;
39854 goto scatter_gen;
39855 case IX86_BUILTIN_SCATTERSIV8SI:
39856 icode = CODE_FOR_avx512vl_scattersiv8si;
39857 goto scatter_gen;
39858 case IX86_BUILTIN_SCATTERSIV4SI:
39859 icode = CODE_FOR_avx512vl_scattersiv4si;
39860 goto scatter_gen;
39861 case IX86_BUILTIN_SCATTERSIV4DI:
39862 icode = CODE_FOR_avx512vl_scattersiv4di;
39863 goto scatter_gen;
39864 case IX86_BUILTIN_SCATTERSIV2DI:
39865 icode = CODE_FOR_avx512vl_scattersiv2di;
39866 goto scatter_gen;
39867 case IX86_BUILTIN_SCATTERDIV8SI:
39868 icode = CODE_FOR_avx512vl_scatterdiv8si;
39869 goto scatter_gen;
39870 case IX86_BUILTIN_SCATTERDIV4SI:
39871 icode = CODE_FOR_avx512vl_scatterdiv4si;
39872 goto scatter_gen;
39873 case IX86_BUILTIN_SCATTERDIV4DI:
39874 icode = CODE_FOR_avx512vl_scatterdiv4di;
39875 goto scatter_gen;
39876 case IX86_BUILTIN_SCATTERDIV2DI:
39877 icode = CODE_FOR_avx512vl_scatterdiv2di;
39878 goto scatter_gen;
39879 case IX86_BUILTIN_GATHERPFDPD:
39880 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
39881 goto vec_prefetch_gen;
39882 case IX86_BUILTIN_SCATTERALTSIV8DF:
39883 icode = CODE_FOR_avx512f_scattersiv8df;
39884 goto scatter_gen;
39885 case IX86_BUILTIN_SCATTERALTDIV16SF:
39886 icode = CODE_FOR_avx512f_scatterdiv16sf;
39887 goto scatter_gen;
39888 case IX86_BUILTIN_SCATTERALTSIV8DI:
39889 icode = CODE_FOR_avx512f_scattersiv8di;
39890 goto scatter_gen;
39891 case IX86_BUILTIN_SCATTERALTDIV16SI:
39892 icode = CODE_FOR_avx512f_scatterdiv16si;
39893 goto scatter_gen;
39894 case IX86_BUILTIN_GATHERPFDPS:
39895 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
39896 goto vec_prefetch_gen;
39897 case IX86_BUILTIN_GATHERPFQPD:
39898 icode = CODE_FOR_avx512pf_gatherpfv8didf;
39899 goto vec_prefetch_gen;
39900 case IX86_BUILTIN_GATHERPFQPS:
39901 icode = CODE_FOR_avx512pf_gatherpfv8disf;
39902 goto vec_prefetch_gen;
39903 case IX86_BUILTIN_SCATTERPFDPD:
39904 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
39905 goto vec_prefetch_gen;
39906 case IX86_BUILTIN_SCATTERPFDPS:
39907 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
39908 goto vec_prefetch_gen;
39909 case IX86_BUILTIN_SCATTERPFQPD:
39910 icode = CODE_FOR_avx512pf_scatterpfv8didf;
39911 goto vec_prefetch_gen;
39912 case IX86_BUILTIN_SCATTERPFQPS:
39913 icode = CODE_FOR_avx512pf_scatterpfv8disf;
39914 goto vec_prefetch_gen;
39916 gather_gen:
39917 rtx half;
39918 rtx (*gen) (rtx, rtx);
39920 arg0 = CALL_EXPR_ARG (exp, 0);
39921 arg1 = CALL_EXPR_ARG (exp, 1);
39922 arg2 = CALL_EXPR_ARG (exp, 2);
39923 arg3 = CALL_EXPR_ARG (exp, 3);
39924 arg4 = CALL_EXPR_ARG (exp, 4);
39925 op0 = expand_normal (arg0);
39926 op1 = expand_normal (arg1);
39927 op2 = expand_normal (arg2);
39928 op3 = expand_normal (arg3);
39929 op4 = expand_normal (arg4);
39930 /* Note the arg order is different from the operand order. */
39931 mode0 = insn_data[icode].operand[1].mode;
39932 mode2 = insn_data[icode].operand[3].mode;
39933 mode3 = insn_data[icode].operand[4].mode;
39934 mode4 = insn_data[icode].operand[5].mode;
39936 if (target == NULL_RTX
39937 || GET_MODE (target) != insn_data[icode].operand[0].mode
39938 || !insn_data[icode].operand[0].predicate (target,
39939 GET_MODE (target)))
39940 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
39941 else
39942 subtarget = target;
39944 switch (fcode)
39946 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39947 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39948 half = gen_reg_rtx (V8SImode);
39949 if (!nonimmediate_operand (op2, V16SImode))
39950 op2 = copy_to_mode_reg (V16SImode, op2);
39951 emit_insn (gen_vec_extract_lo_v16si (half, op2));
39952 op2 = half;
39953 break;
39954 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39955 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39956 case IX86_BUILTIN_GATHERALTSIV4DF:
39957 case IX86_BUILTIN_GATHERALTSIV4DI:
39958 half = gen_reg_rtx (V4SImode);
39959 if (!nonimmediate_operand (op2, V8SImode))
39960 op2 = copy_to_mode_reg (V8SImode, op2);
39961 emit_insn (gen_vec_extract_lo_v8si (half, op2));
39962 op2 = half;
39963 break;
39964 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39965 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39966 half = gen_reg_rtx (mode0);
39967 if (mode0 == V8SFmode)
39968 gen = gen_vec_extract_lo_v16sf;
39969 else
39970 gen = gen_vec_extract_lo_v16si;
39971 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39972 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39973 emit_insn (gen (half, op0));
39974 op0 = half;
39975 if (GET_MODE (op3) != VOIDmode)
39977 if (!nonimmediate_operand (op3, GET_MODE (op3)))
39978 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
39979 emit_insn (gen (half, op3));
39980 op3 = half;
39982 break;
39983 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39984 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39985 case IX86_BUILTIN_GATHERALTDIV8SF:
39986 case IX86_BUILTIN_GATHERALTDIV8SI:
39987 half = gen_reg_rtx (mode0);
39988 if (mode0 == V4SFmode)
39989 gen = gen_vec_extract_lo_v8sf;
39990 else
39991 gen = gen_vec_extract_lo_v8si;
39992 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39993 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39994 emit_insn (gen (half, op0));
39995 op0 = half;
39996 if (GET_MODE (op3) != VOIDmode)
39998 if (!nonimmediate_operand (op3, GET_MODE (op3)))
39999 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
40000 emit_insn (gen (half, op3));
40001 op3 = half;
40003 break;
40004 default:
40005 break;
40008 /* Force memory operand only with base register here. But we
40009 don't want to do it on memory operand for other builtin
40010 functions. */
40011 op1 = ix86_zero_extend_to_Pmode (op1);
40013 if (!insn_data[icode].operand[1].predicate (op0, mode0))
40014 op0 = copy_to_mode_reg (mode0, op0);
40015 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
40016 op1 = copy_to_mode_reg (Pmode, op1);
40017 if (!insn_data[icode].operand[3].predicate (op2, mode2))
40018 op2 = copy_to_mode_reg (mode2, op2);
40020 op3 = fixup_modeless_constant (op3, mode3);
40022 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
40024 if (!insn_data[icode].operand[4].predicate (op3, mode3))
40025 op3 = copy_to_mode_reg (mode3, op3);
40027 else
40029 op3 = copy_to_reg (op3);
40030 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
40032 if (!insn_data[icode].operand[5].predicate (op4, mode4))
40034 error ("the last argument must be scale 1, 2, 4, 8");
40035 return const0_rtx;
40038 /* Optimize. If mask is known to have all high bits set,
40039 replace op0 with pc_rtx to signal that the instruction
40040 overwrites the whole destination and doesn't use its
40041 previous contents. */
40042 if (optimize)
40044 if (TREE_CODE (arg3) == INTEGER_CST)
40046 if (integer_all_onesp (arg3))
40047 op0 = pc_rtx;
40049 else if (TREE_CODE (arg3) == VECTOR_CST)
40051 unsigned int negative = 0;
40052 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
40054 tree cst = VECTOR_CST_ELT (arg3, i);
40055 if (TREE_CODE (cst) == INTEGER_CST
40056 && tree_int_cst_sign_bit (cst))
40057 negative++;
40058 else if (TREE_CODE (cst) == REAL_CST
40059 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
40060 negative++;
40062 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
40063 op0 = pc_rtx;
40065 else if (TREE_CODE (arg3) == SSA_NAME
40066 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
40068 /* Recognize also when mask is like:
40069 __v2df src = _mm_setzero_pd ();
40070 __v2df mask = _mm_cmpeq_pd (src, src);
40072 __v8sf src = _mm256_setzero_ps ();
40073 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
40074 as that is a cheaper way to load all ones into
40075 a register than having to load a constant from
40076 memory. */
40077 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
40078 if (is_gimple_call (def_stmt))
40080 tree fndecl = gimple_call_fndecl (def_stmt);
40081 if (fndecl
40082 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
40083 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
40085 case IX86_BUILTIN_CMPPD:
40086 case IX86_BUILTIN_CMPPS:
40087 case IX86_BUILTIN_CMPPD256:
40088 case IX86_BUILTIN_CMPPS256:
40089 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
40090 break;
40091 /* FALLTHRU */
40092 case IX86_BUILTIN_CMPEQPD:
40093 case IX86_BUILTIN_CMPEQPS:
40094 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
40095 && initializer_zerop (gimple_call_arg (def_stmt,
40096 1)))
40097 op0 = pc_rtx;
40098 break;
40099 default:
40100 break;
40106 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
40107 if (! pat)
40108 return const0_rtx;
40109 emit_insn (pat);
40111 switch (fcode)
40113 case IX86_BUILTIN_GATHER3DIV16SF:
40114 if (target == NULL_RTX)
40115 target = gen_reg_rtx (V8SFmode);
40116 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
40117 break;
40118 case IX86_BUILTIN_GATHER3DIV16SI:
40119 if (target == NULL_RTX)
40120 target = gen_reg_rtx (V8SImode);
40121 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
40122 break;
40123 case IX86_BUILTIN_GATHER3DIV8SF:
40124 case IX86_BUILTIN_GATHERDIV8SF:
40125 if (target == NULL_RTX)
40126 target = gen_reg_rtx (V4SFmode);
40127 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
40128 break;
40129 case IX86_BUILTIN_GATHER3DIV8SI:
40130 case IX86_BUILTIN_GATHERDIV8SI:
40131 if (target == NULL_RTX)
40132 target = gen_reg_rtx (V4SImode);
40133 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
40134 break;
40135 default:
40136 target = subtarget;
40137 break;
40139 return target;
40141 scatter_gen:
40142 arg0 = CALL_EXPR_ARG (exp, 0);
40143 arg1 = CALL_EXPR_ARG (exp, 1);
40144 arg2 = CALL_EXPR_ARG (exp, 2);
40145 arg3 = CALL_EXPR_ARG (exp, 3);
40146 arg4 = CALL_EXPR_ARG (exp, 4);
40147 op0 = expand_normal (arg0);
40148 op1 = expand_normal (arg1);
40149 op2 = expand_normal (arg2);
40150 op3 = expand_normal (arg3);
40151 op4 = expand_normal (arg4);
40152 mode1 = insn_data[icode].operand[1].mode;
40153 mode2 = insn_data[icode].operand[2].mode;
40154 mode3 = insn_data[icode].operand[3].mode;
40155 mode4 = insn_data[icode].operand[4].mode;
40157 /* Scatter instruction stores operand op3 to memory with
40158 indices from op2 and scale from op4 under writemask op1.
40159 If index operand op2 has more elements then source operand
40160 op3 one need to use only its low half. And vice versa. */
40161 switch (fcode)
40163 case IX86_BUILTIN_SCATTERALTSIV8DF:
40164 case IX86_BUILTIN_SCATTERALTSIV8DI:
40165 half = gen_reg_rtx (V8SImode);
40166 if (!nonimmediate_operand (op2, V16SImode))
40167 op2 = copy_to_mode_reg (V16SImode, op2);
40168 emit_insn (gen_vec_extract_lo_v16si (half, op2));
40169 op2 = half;
40170 break;
40171 case IX86_BUILTIN_SCATTERALTDIV16SF:
40172 case IX86_BUILTIN_SCATTERALTDIV16SI:
40173 half = gen_reg_rtx (mode3);
40174 if (mode3 == V8SFmode)
40175 gen = gen_vec_extract_lo_v16sf;
40176 else
40177 gen = gen_vec_extract_lo_v16si;
40178 if (!nonimmediate_operand (op3, GET_MODE (op3)))
40179 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
40180 emit_insn (gen (half, op3));
40181 op3 = half;
40182 break;
40183 default:
40184 break;
40187 /* Force memory operand only with base register here. But we
40188 don't want to do it on memory operand for other builtin
40189 functions. */
40190 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
40192 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
40193 op0 = copy_to_mode_reg (Pmode, op0);
40195 op1 = fixup_modeless_constant (op1, mode1);
40197 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
40199 if (!insn_data[icode].operand[1].predicate (op1, mode1))
40200 op1 = copy_to_mode_reg (mode1, op1);
40202 else
40204 op1 = copy_to_reg (op1);
40205 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
40208 if (!insn_data[icode].operand[2].predicate (op2, mode2))
40209 op2 = copy_to_mode_reg (mode2, op2);
40211 if (!insn_data[icode].operand[3].predicate (op3, mode3))
40212 op3 = copy_to_mode_reg (mode3, op3);
40214 if (!insn_data[icode].operand[4].predicate (op4, mode4))
40216 error ("the last argument must be scale 1, 2, 4, 8");
40217 return const0_rtx;
40220 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
40221 if (! pat)
40222 return const0_rtx;
40224 emit_insn (pat);
40225 return 0;
40227 vec_prefetch_gen:
40228 arg0 = CALL_EXPR_ARG (exp, 0);
40229 arg1 = CALL_EXPR_ARG (exp, 1);
40230 arg2 = CALL_EXPR_ARG (exp, 2);
40231 arg3 = CALL_EXPR_ARG (exp, 3);
40232 arg4 = CALL_EXPR_ARG (exp, 4);
40233 op0 = expand_normal (arg0);
40234 op1 = expand_normal (arg1);
40235 op2 = expand_normal (arg2);
40236 op3 = expand_normal (arg3);
40237 op4 = expand_normal (arg4);
40238 mode0 = insn_data[icode].operand[0].mode;
40239 mode1 = insn_data[icode].operand[1].mode;
40240 mode3 = insn_data[icode].operand[3].mode;
40241 mode4 = insn_data[icode].operand[4].mode;
40243 op0 = fixup_modeless_constant (op0, mode0);
40245 if (GET_MODE (op0) == mode0
40246 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
40248 if (!insn_data[icode].operand[0].predicate (op0, mode0))
40249 op0 = copy_to_mode_reg (mode0, op0);
40251 else if (op0 != constm1_rtx)
40253 op0 = copy_to_reg (op0);
40254 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
40257 if (!insn_data[icode].operand[1].predicate (op1, mode1))
40258 op1 = copy_to_mode_reg (mode1, op1);
40260 /* Force memory operand only with base register here. But we
40261 don't want to do it on memory operand for other builtin
40262 functions. */
40263 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
40265 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
40266 op2 = copy_to_mode_reg (Pmode, op2);
40268 if (!insn_data[icode].operand[3].predicate (op3, mode3))
40270 error ("the forth argument must be scale 1, 2, 4, 8");
40271 return const0_rtx;
40274 if (!insn_data[icode].operand[4].predicate (op4, mode4))
40276 error ("incorrect hint operand");
40277 return const0_rtx;
40280 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
40281 if (! pat)
40282 return const0_rtx;
40284 emit_insn (pat);
40286 return 0;
40288 case IX86_BUILTIN_XABORT:
40289 icode = CODE_FOR_xabort;
40290 arg0 = CALL_EXPR_ARG (exp, 0);
40291 op0 = expand_normal (arg0);
40292 mode0 = insn_data[icode].operand[0].mode;
40293 if (!insn_data[icode].operand[0].predicate (op0, mode0))
40295 error ("the xabort's argument must be an 8-bit immediate");
40296 return const0_rtx;
40298 emit_insn (gen_xabort (op0));
40299 return 0;
40301 default:
40302 break;
40305 for (i = 0, d = bdesc_special_args;
40306 i < ARRAY_SIZE (bdesc_special_args);
40307 i++, d++)
40308 if (d->code == fcode)
40309 return ix86_expand_special_args_builtin (d, exp, target);
40311 for (i = 0, d = bdesc_args;
40312 i < ARRAY_SIZE (bdesc_args);
40313 i++, d++)
40314 if (d->code == fcode)
40315 switch (fcode)
40317 case IX86_BUILTIN_FABSQ:
40318 case IX86_BUILTIN_COPYSIGNQ:
40319 if (!TARGET_SSE)
40320 /* Emit a normal call if SSE isn't available. */
40321 return expand_call (exp, target, ignore);
40322 default:
40323 return ix86_expand_args_builtin (d, exp, target);
40326 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
40327 if (d->code == fcode)
40328 return ix86_expand_sse_comi (d, exp, target);
40330 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
40331 if (d->code == fcode)
40332 return ix86_expand_round_builtin (d, exp, target);
40334 for (i = 0, d = bdesc_pcmpestr;
40335 i < ARRAY_SIZE (bdesc_pcmpestr);
40336 i++, d++)
40337 if (d->code == fcode)
40338 return ix86_expand_sse_pcmpestr (d, exp, target);
40340 for (i = 0, d = bdesc_pcmpistr;
40341 i < ARRAY_SIZE (bdesc_pcmpistr);
40342 i++, d++)
40343 if (d->code == fcode)
40344 return ix86_expand_sse_pcmpistr (d, exp, target);
40346 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
40347 if (d->code == fcode)
40348 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
40349 (enum ix86_builtin_func_type)
40350 d->flag, d->comparison);
40352 gcc_unreachable ();
40355 /* This returns the target-specific builtin with code CODE if
40356 current_function_decl has visibility on this builtin, which is checked
40357 using isa flags. Returns NULL_TREE otherwise. */
40359 static tree ix86_get_builtin (enum ix86_builtins code)
40361 struct cl_target_option *opts;
40362 tree target_tree = NULL_TREE;
40364 /* Determine the isa flags of current_function_decl. */
40366 if (current_function_decl)
40367 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
40369 if (target_tree == NULL)
40370 target_tree = target_option_default_node;
40372 opts = TREE_TARGET_OPTION (target_tree);
40374 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
40375 return ix86_builtin_decl (code, true);
40376 else
40377 return NULL_TREE;
40380 /* Return function decl for target specific builtin
40381 for given MPX builtin passed i FCODE. */
40382 static tree
40383 ix86_builtin_mpx_function (unsigned fcode)
40385 switch (fcode)
40387 case BUILT_IN_CHKP_BNDMK:
40388 return ix86_builtins[IX86_BUILTIN_BNDMK];
40390 case BUILT_IN_CHKP_BNDSTX:
40391 return ix86_builtins[IX86_BUILTIN_BNDSTX];
40393 case BUILT_IN_CHKP_BNDLDX:
40394 return ix86_builtins[IX86_BUILTIN_BNDLDX];
40396 case BUILT_IN_CHKP_BNDCL:
40397 return ix86_builtins[IX86_BUILTIN_BNDCL];
40399 case BUILT_IN_CHKP_BNDCU:
40400 return ix86_builtins[IX86_BUILTIN_BNDCU];
40402 case BUILT_IN_CHKP_BNDRET:
40403 return ix86_builtins[IX86_BUILTIN_BNDRET];
40405 case BUILT_IN_CHKP_INTERSECT:
40406 return ix86_builtins[IX86_BUILTIN_BNDINT];
40408 case BUILT_IN_CHKP_NARROW:
40409 return ix86_builtins[IX86_BUILTIN_BNDNARROW];
40411 case BUILT_IN_CHKP_SIZEOF:
40412 return ix86_builtins[IX86_BUILTIN_SIZEOF];
40414 case BUILT_IN_CHKP_EXTRACT_LOWER:
40415 return ix86_builtins[IX86_BUILTIN_BNDLOWER];
40417 case BUILT_IN_CHKP_EXTRACT_UPPER:
40418 return ix86_builtins[IX86_BUILTIN_BNDUPPER];
40420 default:
40421 return NULL_TREE;
40424 gcc_unreachable ();
40427 /* Helper function for ix86_load_bounds and ix86_store_bounds.
40429 Return an address to be used to load/store bounds for pointer
40430 passed in SLOT.
40432 SLOT_NO is an integer constant holding number of a target
40433 dependent special slot to be used in case SLOT is not a memory.
40435 SPECIAL_BASE is a pointer to be used as a base of fake address
40436 to access special slots in Bounds Table. SPECIAL_BASE[-1],
40437 SPECIAL_BASE[-2] etc. will be used as fake pointer locations. */
40439 static rtx
40440 ix86_get_arg_address_for_bt (rtx slot, rtx slot_no, rtx special_base)
40442 rtx addr = NULL;
40444 /* NULL slot means we pass bounds for pointer not passed to the
40445 function at all. Register slot means we pass pointer in a
40446 register. In both these cases bounds are passed via Bounds
40447 Table. Since we do not have actual pointer stored in memory,
40448 we have to use fake addresses to access Bounds Table. We
40449 start with (special_base - sizeof (void*)) and decrease this
40450 address by pointer size to get addresses for other slots. */
40451 if (!slot || REG_P (slot))
40453 gcc_assert (CONST_INT_P (slot_no));
40454 addr = plus_constant (Pmode, special_base,
40455 -(INTVAL (slot_no) + 1) * GET_MODE_SIZE (Pmode));
40457 /* If pointer is passed in a memory then its address is used to
40458 access Bounds Table. */
40459 else if (MEM_P (slot))
40461 addr = XEXP (slot, 0);
40462 if (!register_operand (addr, Pmode))
40463 addr = copy_addr_to_reg (addr);
40465 else
40466 gcc_unreachable ();
40468 return addr;
40471 /* Expand pass uses this hook to load bounds for function parameter
40472 PTR passed in SLOT in case its bounds are not passed in a register.
40474 If SLOT is a memory, then bounds are loaded as for regular pointer
40475 loaded from memory. PTR may be NULL in case SLOT is a memory.
40476 In such case value of PTR (if required) may be loaded from SLOT.
40478 If SLOT is NULL or a register then SLOT_NO is an integer constant
40479 holding number of the target dependent special slot which should be
40480 used to obtain bounds.
40482 Return loaded bounds. */
40484 static rtx
40485 ix86_load_bounds (rtx slot, rtx ptr, rtx slot_no)
40487 rtx reg = gen_reg_rtx (BNDmode);
40488 rtx addr;
40490 /* Get address to be used to access Bounds Table. Special slots start
40491 at the location of return address of the current function. */
40492 addr = ix86_get_arg_address_for_bt (slot, slot_no, arg_pointer_rtx);
40494 /* Load pointer value from a memory if we don't have it. */
40495 if (!ptr)
40497 gcc_assert (MEM_P (slot));
40498 ptr = copy_addr_to_reg (slot);
40501 if (!register_operand (ptr, Pmode))
40502 ptr = ix86_zero_extend_to_Pmode (ptr);
40504 emit_insn (BNDmode == BND64mode
40505 ? gen_bnd64_ldx (reg, addr, ptr)
40506 : gen_bnd32_ldx (reg, addr, ptr));
40508 return reg;
40511 /* Expand pass uses this hook to store BOUNDS for call argument PTR
40512 passed in SLOT in case BOUNDS are not passed in a register.
40514 If SLOT is a memory, then BOUNDS are stored as for regular pointer
40515 stored in memory. PTR may be NULL in case SLOT is a memory.
40516 In such case value of PTR (if required) may be loaded from SLOT.
40518 If SLOT is NULL or a register then SLOT_NO is an integer constant
40519 holding number of the target dependent special slot which should be
40520 used to store BOUNDS. */
40522 static void
40523 ix86_store_bounds (rtx ptr, rtx slot, rtx bounds, rtx slot_no)
40525 rtx addr;
40527 /* Get address to be used to access Bounds Table. Special slots start
40528 at the location of return address of a called function. */
40529 addr = ix86_get_arg_address_for_bt (slot, slot_no, stack_pointer_rtx);
40531 /* Load pointer value from a memory if we don't have it. */
40532 if (!ptr)
40534 gcc_assert (MEM_P (slot));
40535 ptr = copy_addr_to_reg (slot);
40538 if (!register_operand (ptr, Pmode))
40539 ptr = ix86_zero_extend_to_Pmode (ptr);
40541 gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds)));
40542 if (!register_operand (bounds, BNDmode))
40543 bounds = copy_to_mode_reg (BNDmode, bounds);
40545 emit_insn (BNDmode == BND64mode
40546 ? gen_bnd64_stx (addr, ptr, bounds)
40547 : gen_bnd32_stx (addr, ptr, bounds));
40550 /* Load and return bounds returned by function in SLOT. */
40552 static rtx
40553 ix86_load_returned_bounds (rtx slot)
40555 rtx res;
40557 gcc_assert (REG_P (slot));
40558 res = gen_reg_rtx (BNDmode);
40559 emit_move_insn (res, slot);
40561 return res;
40564 /* Store BOUNDS returned by function into SLOT. */
40566 static void
40567 ix86_store_returned_bounds (rtx slot, rtx bounds)
40569 gcc_assert (REG_P (slot));
40570 emit_move_insn (slot, bounds);
40573 /* Returns a function decl for a vectorized version of the builtin function
40574 with builtin function code FN and the result vector type TYPE, or NULL_TREE
40575 if it is not available. */
40577 static tree
40578 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
40579 tree type_in)
40581 machine_mode in_mode, out_mode;
40582 int in_n, out_n;
40583 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
40585 if (TREE_CODE (type_out) != VECTOR_TYPE
40586 || TREE_CODE (type_in) != VECTOR_TYPE
40587 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
40588 return NULL_TREE;
40590 out_mode = TYPE_MODE (TREE_TYPE (type_out));
40591 out_n = TYPE_VECTOR_SUBPARTS (type_out);
40592 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40593 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40595 switch (fn)
40597 case BUILT_IN_SQRT:
40598 if (out_mode == DFmode && in_mode == DFmode)
40600 if (out_n == 2 && in_n == 2)
40601 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
40602 else if (out_n == 4 && in_n == 4)
40603 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
40604 else if (out_n == 8 && in_n == 8)
40605 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
40607 break;
40609 case BUILT_IN_EXP2F:
40610 if (out_mode == SFmode && in_mode == SFmode)
40612 if (out_n == 16 && in_n == 16)
40613 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
40615 break;
40617 case BUILT_IN_SQRTF:
40618 if (out_mode == SFmode && in_mode == SFmode)
40620 if (out_n == 4 && in_n == 4)
40621 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
40622 else if (out_n == 8 && in_n == 8)
40623 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
40624 else if (out_n == 16 && in_n == 16)
40625 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
40627 break;
40629 case BUILT_IN_IFLOOR:
40630 case BUILT_IN_LFLOOR:
40631 case BUILT_IN_LLFLOOR:
40632 /* The round insn does not trap on denormals. */
40633 if (flag_trapping_math || !TARGET_ROUND)
40634 break;
40636 if (out_mode == SImode && in_mode == DFmode)
40638 if (out_n == 4 && in_n == 2)
40639 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
40640 else if (out_n == 8 && in_n == 4)
40641 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
40642 else if (out_n == 16 && in_n == 8)
40643 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
40645 break;
40647 case BUILT_IN_IFLOORF:
40648 case BUILT_IN_LFLOORF:
40649 case BUILT_IN_LLFLOORF:
40650 /* The round insn does not trap on denormals. */
40651 if (flag_trapping_math || !TARGET_ROUND)
40652 break;
40654 if (out_mode == SImode && in_mode == SFmode)
40656 if (out_n == 4 && in_n == 4)
40657 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
40658 else if (out_n == 8 && in_n == 8)
40659 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
40661 break;
40663 case BUILT_IN_ICEIL:
40664 case BUILT_IN_LCEIL:
40665 case BUILT_IN_LLCEIL:
40666 /* The round insn does not trap on denormals. */
40667 if (flag_trapping_math || !TARGET_ROUND)
40668 break;
40670 if (out_mode == SImode && in_mode == DFmode)
40672 if (out_n == 4 && in_n == 2)
40673 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
40674 else if (out_n == 8 && in_n == 4)
40675 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
40676 else if (out_n == 16 && in_n == 8)
40677 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
40679 break;
40681 case BUILT_IN_ICEILF:
40682 case BUILT_IN_LCEILF:
40683 case BUILT_IN_LLCEILF:
40684 /* The round insn does not trap on denormals. */
40685 if (flag_trapping_math || !TARGET_ROUND)
40686 break;
40688 if (out_mode == SImode && in_mode == SFmode)
40690 if (out_n == 4 && in_n == 4)
40691 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
40692 else if (out_n == 8 && in_n == 8)
40693 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
40695 break;
40697 case BUILT_IN_IRINT:
40698 case BUILT_IN_LRINT:
40699 case BUILT_IN_LLRINT:
40700 if (out_mode == SImode && in_mode == DFmode)
40702 if (out_n == 4 && in_n == 2)
40703 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
40704 else if (out_n == 8 && in_n == 4)
40705 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
40707 break;
40709 case BUILT_IN_IRINTF:
40710 case BUILT_IN_LRINTF:
40711 case BUILT_IN_LLRINTF:
40712 if (out_mode == SImode && in_mode == SFmode)
40714 if (out_n == 4 && in_n == 4)
40715 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
40716 else if (out_n == 8 && in_n == 8)
40717 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
40719 break;
40721 case BUILT_IN_IROUND:
40722 case BUILT_IN_LROUND:
40723 case BUILT_IN_LLROUND:
40724 /* The round insn does not trap on denormals. */
40725 if (flag_trapping_math || !TARGET_ROUND)
40726 break;
40728 if (out_mode == SImode && in_mode == DFmode)
40730 if (out_n == 4 && in_n == 2)
40731 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
40732 else if (out_n == 8 && in_n == 4)
40733 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
40734 else if (out_n == 16 && in_n == 8)
40735 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
40737 break;
40739 case BUILT_IN_IROUNDF:
40740 case BUILT_IN_LROUNDF:
40741 case BUILT_IN_LLROUNDF:
40742 /* The round insn does not trap on denormals. */
40743 if (flag_trapping_math || !TARGET_ROUND)
40744 break;
40746 if (out_mode == SImode && in_mode == SFmode)
40748 if (out_n == 4 && in_n == 4)
40749 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
40750 else if (out_n == 8 && in_n == 8)
40751 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
40753 break;
40755 case BUILT_IN_COPYSIGN:
40756 if (out_mode == DFmode && in_mode == DFmode)
40758 if (out_n == 2 && in_n == 2)
40759 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
40760 else if (out_n == 4 && in_n == 4)
40761 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
40762 else if (out_n == 8 && in_n == 8)
40763 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
40765 break;
40767 case BUILT_IN_COPYSIGNF:
40768 if (out_mode == SFmode && in_mode == SFmode)
40770 if (out_n == 4 && in_n == 4)
40771 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
40772 else if (out_n == 8 && in_n == 8)
40773 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
40774 else if (out_n == 16 && in_n == 16)
40775 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
40777 break;
40779 case BUILT_IN_FLOOR:
40780 /* The round insn does not trap on denormals. */
40781 if (flag_trapping_math || !TARGET_ROUND)
40782 break;
40784 if (out_mode == DFmode && in_mode == DFmode)
40786 if (out_n == 2 && in_n == 2)
40787 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
40788 else if (out_n == 4 && in_n == 4)
40789 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
40791 break;
40793 case BUILT_IN_FLOORF:
40794 /* The round insn does not trap on denormals. */
40795 if (flag_trapping_math || !TARGET_ROUND)
40796 break;
40798 if (out_mode == SFmode && in_mode == SFmode)
40800 if (out_n == 4 && in_n == 4)
40801 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
40802 else if (out_n == 8 && in_n == 8)
40803 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
40805 break;
40807 case BUILT_IN_CEIL:
40808 /* The round insn does not trap on denormals. */
40809 if (flag_trapping_math || !TARGET_ROUND)
40810 break;
40812 if (out_mode == DFmode && in_mode == DFmode)
40814 if (out_n == 2 && in_n == 2)
40815 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
40816 else if (out_n == 4 && in_n == 4)
40817 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
40819 break;
40821 case BUILT_IN_CEILF:
40822 /* The round insn does not trap on denormals. */
40823 if (flag_trapping_math || !TARGET_ROUND)
40824 break;
40826 if (out_mode == SFmode && in_mode == SFmode)
40828 if (out_n == 4 && in_n == 4)
40829 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
40830 else if (out_n == 8 && in_n == 8)
40831 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
40833 break;
40835 case BUILT_IN_TRUNC:
40836 /* The round insn does not trap on denormals. */
40837 if (flag_trapping_math || !TARGET_ROUND)
40838 break;
40840 if (out_mode == DFmode && in_mode == DFmode)
40842 if (out_n == 2 && in_n == 2)
40843 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
40844 else if (out_n == 4 && in_n == 4)
40845 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
40847 break;
40849 case BUILT_IN_TRUNCF:
40850 /* The round insn does not trap on denormals. */
40851 if (flag_trapping_math || !TARGET_ROUND)
40852 break;
40854 if (out_mode == SFmode && in_mode == SFmode)
40856 if (out_n == 4 && in_n == 4)
40857 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
40858 else if (out_n == 8 && in_n == 8)
40859 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
40861 break;
40863 case BUILT_IN_RINT:
40864 /* The round insn does not trap on denormals. */
40865 if (flag_trapping_math || !TARGET_ROUND)
40866 break;
40868 if (out_mode == DFmode && in_mode == DFmode)
40870 if (out_n == 2 && in_n == 2)
40871 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
40872 else if (out_n == 4 && in_n == 4)
40873 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
40875 break;
40877 case BUILT_IN_RINTF:
40878 /* The round insn does not trap on denormals. */
40879 if (flag_trapping_math || !TARGET_ROUND)
40880 break;
40882 if (out_mode == SFmode && in_mode == SFmode)
40884 if (out_n == 4 && in_n == 4)
40885 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
40886 else if (out_n == 8 && in_n == 8)
40887 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
40889 break;
40891 case BUILT_IN_ROUND:
40892 /* The round insn does not trap on denormals. */
40893 if (flag_trapping_math || !TARGET_ROUND)
40894 break;
40896 if (out_mode == DFmode && in_mode == DFmode)
40898 if (out_n == 2 && in_n == 2)
40899 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
40900 else if (out_n == 4 && in_n == 4)
40901 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
40903 break;
40905 case BUILT_IN_ROUNDF:
40906 /* The round insn does not trap on denormals. */
40907 if (flag_trapping_math || !TARGET_ROUND)
40908 break;
40910 if (out_mode == SFmode && in_mode == SFmode)
40912 if (out_n == 4 && in_n == 4)
40913 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
40914 else if (out_n == 8 && in_n == 8)
40915 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
40917 break;
40919 case BUILT_IN_FMA:
40920 if (out_mode == DFmode && in_mode == DFmode)
40922 if (out_n == 2 && in_n == 2)
40923 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
40924 if (out_n == 4 && in_n == 4)
40925 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
40927 break;
40929 case BUILT_IN_FMAF:
40930 if (out_mode == SFmode && in_mode == SFmode)
40932 if (out_n == 4 && in_n == 4)
40933 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
40934 if (out_n == 8 && in_n == 8)
40935 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
40937 break;
40939 default:
40940 break;
40943 /* Dispatch to a handler for a vectorization library. */
40944 if (ix86_veclib_handler)
40945 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
40946 type_in);
40948 return NULL_TREE;
40951 /* Handler for an SVML-style interface to
40952 a library with vectorized intrinsics. */
40954 static tree
40955 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
40957 char name[20];
40958 tree fntype, new_fndecl, args;
40959 unsigned arity;
40960 const char *bname;
40961 machine_mode el_mode, in_mode;
40962 int n, in_n;
40964 /* The SVML is suitable for unsafe math only. */
40965 if (!flag_unsafe_math_optimizations)
40966 return NULL_TREE;
40968 el_mode = TYPE_MODE (TREE_TYPE (type_out));
40969 n = TYPE_VECTOR_SUBPARTS (type_out);
40970 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40971 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40972 if (el_mode != in_mode
40973 || n != in_n)
40974 return NULL_TREE;
40976 switch (fn)
40978 case BUILT_IN_EXP:
40979 case BUILT_IN_LOG:
40980 case BUILT_IN_LOG10:
40981 case BUILT_IN_POW:
40982 case BUILT_IN_TANH:
40983 case BUILT_IN_TAN:
40984 case BUILT_IN_ATAN:
40985 case BUILT_IN_ATAN2:
40986 case BUILT_IN_ATANH:
40987 case BUILT_IN_CBRT:
40988 case BUILT_IN_SINH:
40989 case BUILT_IN_SIN:
40990 case BUILT_IN_ASINH:
40991 case BUILT_IN_ASIN:
40992 case BUILT_IN_COSH:
40993 case BUILT_IN_COS:
40994 case BUILT_IN_ACOSH:
40995 case BUILT_IN_ACOS:
40996 if (el_mode != DFmode || n != 2)
40997 return NULL_TREE;
40998 break;
41000 case BUILT_IN_EXPF:
41001 case BUILT_IN_LOGF:
41002 case BUILT_IN_LOG10F:
41003 case BUILT_IN_POWF:
41004 case BUILT_IN_TANHF:
41005 case BUILT_IN_TANF:
41006 case BUILT_IN_ATANF:
41007 case BUILT_IN_ATAN2F:
41008 case BUILT_IN_ATANHF:
41009 case BUILT_IN_CBRTF:
41010 case BUILT_IN_SINHF:
41011 case BUILT_IN_SINF:
41012 case BUILT_IN_ASINHF:
41013 case BUILT_IN_ASINF:
41014 case BUILT_IN_COSHF:
41015 case BUILT_IN_COSF:
41016 case BUILT_IN_ACOSHF:
41017 case BUILT_IN_ACOSF:
41018 if (el_mode != SFmode || n != 4)
41019 return NULL_TREE;
41020 break;
41022 default:
41023 return NULL_TREE;
41026 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
41028 if (fn == BUILT_IN_LOGF)
41029 strcpy (name, "vmlsLn4");
41030 else if (fn == BUILT_IN_LOG)
41031 strcpy (name, "vmldLn2");
41032 else if (n == 4)
41034 sprintf (name, "vmls%s", bname+10);
41035 name[strlen (name)-1] = '4';
41037 else
41038 sprintf (name, "vmld%s2", bname+10);
41040 /* Convert to uppercase. */
41041 name[4] &= ~0x20;
41043 arity = 0;
41044 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
41045 args;
41046 args = TREE_CHAIN (args))
41047 arity++;
41049 if (arity == 1)
41050 fntype = build_function_type_list (type_out, type_in, NULL);
41051 else
41052 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
41054 /* Build a function declaration for the vectorized function. */
41055 new_fndecl = build_decl (BUILTINS_LOCATION,
41056 FUNCTION_DECL, get_identifier (name), fntype);
41057 TREE_PUBLIC (new_fndecl) = 1;
41058 DECL_EXTERNAL (new_fndecl) = 1;
41059 DECL_IS_NOVOPS (new_fndecl) = 1;
41060 TREE_READONLY (new_fndecl) = 1;
41062 return new_fndecl;
41065 /* Handler for an ACML-style interface to
41066 a library with vectorized intrinsics. */
41068 static tree
41069 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
41071 char name[20] = "__vr.._";
41072 tree fntype, new_fndecl, args;
41073 unsigned arity;
41074 const char *bname;
41075 machine_mode el_mode, in_mode;
41076 int n, in_n;
41078 /* The ACML is 64bits only and suitable for unsafe math only as
41079 it does not correctly support parts of IEEE with the required
41080 precision such as denormals. */
41081 if (!TARGET_64BIT
41082 || !flag_unsafe_math_optimizations)
41083 return NULL_TREE;
41085 el_mode = TYPE_MODE (TREE_TYPE (type_out));
41086 n = TYPE_VECTOR_SUBPARTS (type_out);
41087 in_mode = TYPE_MODE (TREE_TYPE (type_in));
41088 in_n = TYPE_VECTOR_SUBPARTS (type_in);
41089 if (el_mode != in_mode
41090 || n != in_n)
41091 return NULL_TREE;
41093 switch (fn)
41095 case BUILT_IN_SIN:
41096 case BUILT_IN_COS:
41097 case BUILT_IN_EXP:
41098 case BUILT_IN_LOG:
41099 case BUILT_IN_LOG2:
41100 case BUILT_IN_LOG10:
41101 name[4] = 'd';
41102 name[5] = '2';
41103 if (el_mode != DFmode
41104 || n != 2)
41105 return NULL_TREE;
41106 break;
41108 case BUILT_IN_SINF:
41109 case BUILT_IN_COSF:
41110 case BUILT_IN_EXPF:
41111 case BUILT_IN_POWF:
41112 case BUILT_IN_LOGF:
41113 case BUILT_IN_LOG2F:
41114 case BUILT_IN_LOG10F:
41115 name[4] = 's';
41116 name[5] = '4';
41117 if (el_mode != SFmode
41118 || n != 4)
41119 return NULL_TREE;
41120 break;
41122 default:
41123 return NULL_TREE;
41126 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
41127 sprintf (name + 7, "%s", bname+10);
41129 arity = 0;
41130 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
41131 args;
41132 args = TREE_CHAIN (args))
41133 arity++;
41135 if (arity == 1)
41136 fntype = build_function_type_list (type_out, type_in, NULL);
41137 else
41138 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
41140 /* Build a function declaration for the vectorized function. */
41141 new_fndecl = build_decl (BUILTINS_LOCATION,
41142 FUNCTION_DECL, get_identifier (name), fntype);
41143 TREE_PUBLIC (new_fndecl) = 1;
41144 DECL_EXTERNAL (new_fndecl) = 1;
41145 DECL_IS_NOVOPS (new_fndecl) = 1;
41146 TREE_READONLY (new_fndecl) = 1;
41148 return new_fndecl;
41151 /* Returns a decl of a function that implements gather load with
41152 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
41153 Return NULL_TREE if it is not available. */
41155 static tree
41156 ix86_vectorize_builtin_gather (const_tree mem_vectype,
41157 const_tree index_type, int scale)
41159 bool si;
41160 enum ix86_builtins code;
41162 if (! TARGET_AVX2)
41163 return NULL_TREE;
41165 if ((TREE_CODE (index_type) != INTEGER_TYPE
41166 && !POINTER_TYPE_P (index_type))
41167 || (TYPE_MODE (index_type) != SImode
41168 && TYPE_MODE (index_type) != DImode))
41169 return NULL_TREE;
41171 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
41172 return NULL_TREE;
41174 /* v*gather* insn sign extends index to pointer mode. */
41175 if (TYPE_PRECISION (index_type) < POINTER_SIZE
41176 && TYPE_UNSIGNED (index_type))
41177 return NULL_TREE;
41179 if (scale <= 0
41180 || scale > 8
41181 || (scale & (scale - 1)) != 0)
41182 return NULL_TREE;
41184 si = TYPE_MODE (index_type) == SImode;
41185 switch (TYPE_MODE (mem_vectype))
41187 case V2DFmode:
41188 if (TARGET_AVX512VL)
41189 code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
41190 else
41191 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
41192 break;
41193 case V4DFmode:
41194 if (TARGET_AVX512VL)
41195 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
41196 else
41197 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
41198 break;
41199 case V2DImode:
41200 if (TARGET_AVX512VL)
41201 code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
41202 else
41203 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
41204 break;
41205 case V4DImode:
41206 if (TARGET_AVX512VL)
41207 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
41208 else
41209 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
41210 break;
41211 case V4SFmode:
41212 if (TARGET_AVX512VL)
41213 code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
41214 else
41215 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
41216 break;
41217 case V8SFmode:
41218 if (TARGET_AVX512VL)
41219 code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
41220 else
41221 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
41222 break;
41223 case V4SImode:
41224 if (TARGET_AVX512VL)
41225 code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
41226 else
41227 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
41228 break;
41229 case V8SImode:
41230 if (TARGET_AVX512VL)
41231 code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
41232 else
41233 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
41234 break;
41235 case V8DFmode:
41236 if (TARGET_AVX512F)
41237 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
41238 else
41239 return NULL_TREE;
41240 break;
41241 case V8DImode:
41242 if (TARGET_AVX512F)
41243 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
41244 else
41245 return NULL_TREE;
41246 break;
41247 case V16SFmode:
41248 if (TARGET_AVX512F)
41249 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
41250 else
41251 return NULL_TREE;
41252 break;
41253 case V16SImode:
41254 if (TARGET_AVX512F)
41255 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
41256 else
41257 return NULL_TREE;
41258 break;
41259 default:
41260 return NULL_TREE;
41263 return ix86_get_builtin (code);
41266 /* Returns a decl of a function that implements scatter store with
41267 register type VECTYPE and index type INDEX_TYPE and SCALE.
41268 Return NULL_TREE if it is not available. */
41270 static tree
41271 ix86_vectorize_builtin_scatter (const_tree vectype,
41272 const_tree index_type, int scale)
41274 bool si;
41275 enum ix86_builtins code;
41277 if (!TARGET_AVX512F)
41278 return NULL_TREE;
41280 if ((TREE_CODE (index_type) != INTEGER_TYPE
41281 && !POINTER_TYPE_P (index_type))
41282 || (TYPE_MODE (index_type) != SImode
41283 && TYPE_MODE (index_type) != DImode))
41284 return NULL_TREE;
41286 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
41287 return NULL_TREE;
41289 /* v*scatter* insn sign extends index to pointer mode. */
41290 if (TYPE_PRECISION (index_type) < POINTER_SIZE
41291 && TYPE_UNSIGNED (index_type))
41292 return NULL_TREE;
41294 /* Scale can be 1, 2, 4 or 8. */
41295 if (scale <= 0
41296 || scale > 8
41297 || (scale & (scale - 1)) != 0)
41298 return NULL_TREE;
41300 si = TYPE_MODE (index_type) == SImode;
41301 switch (TYPE_MODE (vectype))
41303 case V8DFmode:
41304 code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF;
41305 break;
41306 case V8DImode:
41307 code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI;
41308 break;
41309 case V16SFmode:
41310 code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF;
41311 break;
41312 case V16SImode:
41313 code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI;
41314 break;
41315 default:
41316 return NULL_TREE;
41319 return ix86_builtins[code];
41322 /* Returns a code for a target-specific builtin that implements
41323 reciprocal of the function, or NULL_TREE if not available. */
41325 static tree
41326 ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool)
41328 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
41329 && flag_finite_math_only && !flag_trapping_math
41330 && flag_unsafe_math_optimizations))
41331 return NULL_TREE;
41333 if (md_fn)
41334 /* Machine dependent builtins. */
41335 switch (fn)
41337 /* Vectorized version of sqrt to rsqrt conversion. */
41338 case IX86_BUILTIN_SQRTPS_NR:
41339 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
41341 case IX86_BUILTIN_SQRTPS_NR256:
41342 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
41344 default:
41345 return NULL_TREE;
41347 else
41348 /* Normal builtins. */
41349 switch (fn)
41351 /* Sqrt to rsqrt conversion. */
41352 case BUILT_IN_SQRTF:
41353 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
41355 default:
41356 return NULL_TREE;
41360 /* Helper for avx_vpermilps256_operand et al. This is also used by
41361 the expansion functions to turn the parallel back into a mask.
41362 The return value is 0 for no match and the imm8+1 for a match. */
41365 avx_vpermilp_parallel (rtx par, machine_mode mode)
41367 unsigned i, nelt = GET_MODE_NUNITS (mode);
41368 unsigned mask = 0;
41369 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
41371 if (XVECLEN (par, 0) != (int) nelt)
41372 return 0;
41374 /* Validate that all of the elements are constants, and not totally
41375 out of range. Copy the data into an integral array to make the
41376 subsequent checks easier. */
41377 for (i = 0; i < nelt; ++i)
41379 rtx er = XVECEXP (par, 0, i);
41380 unsigned HOST_WIDE_INT ei;
41382 if (!CONST_INT_P (er))
41383 return 0;
41384 ei = INTVAL (er);
41385 if (ei >= nelt)
41386 return 0;
41387 ipar[i] = ei;
41390 switch (mode)
41392 case V8DFmode:
41393 /* In the 512-bit DFmode case, we can only move elements within
41394 a 128-bit lane. First fill the second part of the mask,
41395 then fallthru. */
41396 for (i = 4; i < 6; ++i)
41398 if (ipar[i] < 4 || ipar[i] >= 6)
41399 return 0;
41400 mask |= (ipar[i] - 4) << i;
41402 for (i = 6; i < 8; ++i)
41404 if (ipar[i] < 6)
41405 return 0;
41406 mask |= (ipar[i] - 6) << i;
41408 /* FALLTHRU */
41410 case V4DFmode:
41411 /* In the 256-bit DFmode case, we can only move elements within
41412 a 128-bit lane. */
41413 for (i = 0; i < 2; ++i)
41415 if (ipar[i] >= 2)
41416 return 0;
41417 mask |= ipar[i] << i;
41419 for (i = 2; i < 4; ++i)
41421 if (ipar[i] < 2)
41422 return 0;
41423 mask |= (ipar[i] - 2) << i;
41425 break;
41427 case V16SFmode:
41428 /* In 512 bit SFmode case, permutation in the upper 256 bits
41429 must mirror the permutation in the lower 256-bits. */
41430 for (i = 0; i < 8; ++i)
41431 if (ipar[i] + 8 != ipar[i + 8])
41432 return 0;
41433 /* FALLTHRU */
41435 case V8SFmode:
41436 /* In 256 bit SFmode case, we have full freedom of
41437 movement within the low 128-bit lane, but the high 128-bit
41438 lane must mirror the exact same pattern. */
41439 for (i = 0; i < 4; ++i)
41440 if (ipar[i] + 4 != ipar[i + 4])
41441 return 0;
41442 nelt = 4;
41443 /* FALLTHRU */
41445 case V2DFmode:
41446 case V4SFmode:
41447 /* In the 128-bit case, we've full freedom in the placement of
41448 the elements from the source operand. */
41449 for (i = 0; i < nelt; ++i)
41450 mask |= ipar[i] << (i * (nelt / 2));
41451 break;
41453 default:
41454 gcc_unreachable ();
41457 /* Make sure success has a non-zero value by adding one. */
41458 return mask + 1;
41461 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
41462 the expansion functions to turn the parallel back into a mask.
41463 The return value is 0 for no match and the imm8+1 for a match. */
41466 avx_vperm2f128_parallel (rtx par, machine_mode mode)
41468 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
41469 unsigned mask = 0;
41470 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
41472 if (XVECLEN (par, 0) != (int) nelt)
41473 return 0;
41475 /* Validate that all of the elements are constants, and not totally
41476 out of range. Copy the data into an integral array to make the
41477 subsequent checks easier. */
41478 for (i = 0; i < nelt; ++i)
41480 rtx er = XVECEXP (par, 0, i);
41481 unsigned HOST_WIDE_INT ei;
41483 if (!CONST_INT_P (er))
41484 return 0;
41485 ei = INTVAL (er);
41486 if (ei >= 2 * nelt)
41487 return 0;
41488 ipar[i] = ei;
41491 /* Validate that the halves of the permute are halves. */
41492 for (i = 0; i < nelt2 - 1; ++i)
41493 if (ipar[i] + 1 != ipar[i + 1])
41494 return 0;
41495 for (i = nelt2; i < nelt - 1; ++i)
41496 if (ipar[i] + 1 != ipar[i + 1])
41497 return 0;
41499 /* Reconstruct the mask. */
41500 for (i = 0; i < 2; ++i)
41502 unsigned e = ipar[i * nelt2];
41503 if (e % nelt2)
41504 return 0;
41505 e /= nelt2;
41506 mask |= e << (i * 4);
41509 /* Make sure success has a non-zero value by adding one. */
41510 return mask + 1;
41513 /* Return a register priority for hard reg REGNO. */
41514 static int
41515 ix86_register_priority (int hard_regno)
41517 /* ebp and r13 as the base always wants a displacement, r12 as the
41518 base always wants an index. So discourage their usage in an
41519 address. */
41520 if (hard_regno == R12_REG || hard_regno == R13_REG)
41521 return 0;
41522 if (hard_regno == BP_REG)
41523 return 1;
41524 /* New x86-64 int registers result in bigger code size. Discourage
41525 them. */
41526 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
41527 return 2;
41528 /* New x86-64 SSE registers result in bigger code size. Discourage
41529 them. */
41530 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
41531 return 2;
41532 /* Usage of AX register results in smaller code. Prefer it. */
41533 if (hard_regno == AX_REG)
41534 return 4;
41535 return 3;
41538 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
41540 Put float CONST_DOUBLE in the constant pool instead of fp regs.
41541 QImode must go into class Q_REGS.
41542 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
41543 movdf to do mem-to-mem moves through integer regs. */
41545 static reg_class_t
41546 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
41548 machine_mode mode = GET_MODE (x);
41550 /* We're only allowed to return a subclass of CLASS. Many of the
41551 following checks fail for NO_REGS, so eliminate that early. */
41552 if (regclass == NO_REGS)
41553 return NO_REGS;
41555 /* All classes can load zeros. */
41556 if (x == CONST0_RTX (mode))
41557 return regclass;
41559 /* Force constants into memory if we are loading a (nonzero) constant into
41560 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
41561 instructions to load from a constant. */
41562 if (CONSTANT_P (x)
41563 && (MAYBE_MMX_CLASS_P (regclass)
41564 || MAYBE_SSE_CLASS_P (regclass)
41565 || MAYBE_MASK_CLASS_P (regclass)))
41566 return NO_REGS;
41568 /* Prefer SSE regs only, if we can use them for math. */
41569 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
41570 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
41572 /* Floating-point constants need more complex checks. */
41573 if (CONST_DOUBLE_P (x))
41575 /* General regs can load everything. */
41576 if (reg_class_subset_p (regclass, GENERAL_REGS))
41577 return regclass;
41579 /* Floats can load 0 and 1 plus some others. Note that we eliminated
41580 zero above. We only want to wind up preferring 80387 registers if
41581 we plan on doing computation with them. */
41582 if (TARGET_80387
41583 && standard_80387_constant_p (x) > 0)
41585 /* Limit class to non-sse. */
41586 if (regclass == FLOAT_SSE_REGS)
41587 return FLOAT_REGS;
41588 if (regclass == FP_TOP_SSE_REGS)
41589 return FP_TOP_REG;
41590 if (regclass == FP_SECOND_SSE_REGS)
41591 return FP_SECOND_REG;
41592 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
41593 return regclass;
41596 return NO_REGS;
41599 /* Generally when we see PLUS here, it's the function invariant
41600 (plus soft-fp const_int). Which can only be computed into general
41601 regs. */
41602 if (GET_CODE (x) == PLUS)
41603 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
41605 /* QImode constants are easy to load, but non-constant QImode data
41606 must go into Q_REGS. */
41607 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
41609 if (reg_class_subset_p (regclass, Q_REGS))
41610 return regclass;
41611 if (reg_class_subset_p (Q_REGS, regclass))
41612 return Q_REGS;
41613 return NO_REGS;
41616 return regclass;
41619 /* Discourage putting floating-point values in SSE registers unless
41620 SSE math is being used, and likewise for the 387 registers. */
41621 static reg_class_t
41622 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
41624 machine_mode mode = GET_MODE (x);
41626 /* Restrict the output reload class to the register bank that we are doing
41627 math on. If we would like not to return a subset of CLASS, reject this
41628 alternative: if reload cannot do this, it will still use its choice. */
41629 mode = GET_MODE (x);
41630 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
41631 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
41633 if (X87_FLOAT_MODE_P (mode))
41635 if (regclass == FP_TOP_SSE_REGS)
41636 return FP_TOP_REG;
41637 else if (regclass == FP_SECOND_SSE_REGS)
41638 return FP_SECOND_REG;
41639 else
41640 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
41643 return regclass;
41646 static reg_class_t
41647 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
41648 machine_mode mode, secondary_reload_info *sri)
41650 /* Double-word spills from general registers to non-offsettable memory
41651 references (zero-extended addresses) require special handling. */
41652 if (TARGET_64BIT
41653 && MEM_P (x)
41654 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
41655 && INTEGER_CLASS_P (rclass)
41656 && !offsettable_memref_p (x))
41658 sri->icode = (in_p
41659 ? CODE_FOR_reload_noff_load
41660 : CODE_FOR_reload_noff_store);
41661 /* Add the cost of moving address to a temporary. */
41662 sri->extra_cost = 1;
41664 return NO_REGS;
41667 /* QImode spills from non-QI registers require
41668 intermediate register on 32bit targets. */
41669 if (mode == QImode
41670 && (MAYBE_MASK_CLASS_P (rclass)
41671 || (!TARGET_64BIT && !in_p
41672 && INTEGER_CLASS_P (rclass)
41673 && MAYBE_NON_Q_CLASS_P (rclass))))
41675 int regno;
41677 if (REG_P (x))
41678 regno = REGNO (x);
41679 else
41680 regno = -1;
41682 if (regno >= FIRST_PSEUDO_REGISTER || SUBREG_P (x))
41683 regno = true_regnum (x);
41685 /* Return Q_REGS if the operand is in memory. */
41686 if (regno == -1)
41687 return Q_REGS;
41690 /* This condition handles corner case where an expression involving
41691 pointers gets vectorized. We're trying to use the address of a
41692 stack slot as a vector initializer.
41694 (set (reg:V2DI 74 [ vect_cst_.2 ])
41695 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
41697 Eventually frame gets turned into sp+offset like this:
41699 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41700 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41701 (const_int 392 [0x188]))))
41703 That later gets turned into:
41705 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41706 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41707 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
41709 We'll have the following reload recorded:
41711 Reload 0: reload_in (DI) =
41712 (plus:DI (reg/f:DI 7 sp)
41713 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
41714 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41715 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
41716 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
41717 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41718 reload_reg_rtx: (reg:V2DI 22 xmm1)
41720 Which isn't going to work since SSE instructions can't handle scalar
41721 additions. Returning GENERAL_REGS forces the addition into integer
41722 register and reload can handle subsequent reloads without problems. */
41724 if (in_p && GET_CODE (x) == PLUS
41725 && SSE_CLASS_P (rclass)
41726 && SCALAR_INT_MODE_P (mode))
41727 return GENERAL_REGS;
41729 return NO_REGS;
41732 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
41734 static bool
41735 ix86_class_likely_spilled_p (reg_class_t rclass)
41737 switch (rclass)
41739 case AREG:
41740 case DREG:
41741 case CREG:
41742 case BREG:
41743 case AD_REGS:
41744 case SIREG:
41745 case DIREG:
41746 case SSE_FIRST_REG:
41747 case FP_TOP_REG:
41748 case FP_SECOND_REG:
41749 case BND_REGS:
41750 return true;
41752 default:
41753 break;
41756 return false;
41759 /* If we are copying between general and FP registers, we need a memory
41760 location. The same is true for SSE and MMX registers.
41762 To optimize register_move_cost performance, allow inline variant.
41764 The macro can't work reliably when one of the CLASSES is class containing
41765 registers from multiple units (SSE, MMX, integer). We avoid this by never
41766 combining those units in single alternative in the machine description.
41767 Ensure that this constraint holds to avoid unexpected surprises.
41769 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
41770 enforce these sanity checks. */
41772 static inline bool
41773 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41774 machine_mode mode, int strict)
41776 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
41777 return false;
41778 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
41779 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
41780 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
41781 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
41782 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
41783 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
41785 gcc_assert (!strict || lra_in_progress);
41786 return true;
41789 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
41790 return true;
41792 /* Between mask and general, we have moves no larger than word size. */
41793 if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
41794 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
41795 return true;
41797 /* ??? This is a lie. We do have moves between mmx/general, and for
41798 mmx/sse2. But by saying we need secondary memory we discourage the
41799 register allocator from using the mmx registers unless needed. */
41800 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
41801 return true;
41803 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41805 /* SSE1 doesn't have any direct moves from other classes. */
41806 if (!TARGET_SSE2)
41807 return true;
41809 /* If the target says that inter-unit moves are more expensive
41810 than moving through memory, then don't generate them. */
41811 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
41812 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
41813 return true;
41815 /* Between SSE and general, we have moves no larger than word size. */
41816 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
41817 return true;
41820 return false;
41823 bool
41824 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41825 machine_mode mode, int strict)
41827 return inline_secondary_memory_needed (class1, class2, mode, strict);
41830 /* Implement the TARGET_CLASS_MAX_NREGS hook.
41832 On the 80386, this is the size of MODE in words,
41833 except in the FP regs, where a single reg is always enough. */
41835 static unsigned char
41836 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
41838 if (MAYBE_INTEGER_CLASS_P (rclass))
41840 if (mode == XFmode)
41841 return (TARGET_64BIT ? 2 : 3);
41842 else if (mode == XCmode)
41843 return (TARGET_64BIT ? 4 : 6);
41844 else
41845 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
41847 else
41849 if (COMPLEX_MODE_P (mode))
41850 return 2;
41851 else
41852 return 1;
41856 /* Return true if the registers in CLASS cannot represent the change from
41857 modes FROM to TO. */
41859 bool
41860 ix86_cannot_change_mode_class (machine_mode from, machine_mode to,
41861 enum reg_class regclass)
41863 if (from == to)
41864 return false;
41866 /* x87 registers can't do subreg at all, as all values are reformatted
41867 to extended precision. */
41868 if (MAYBE_FLOAT_CLASS_P (regclass))
41869 return true;
41871 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
41873 /* Vector registers do not support QI or HImode loads. If we don't
41874 disallow a change to these modes, reload will assume it's ok to
41875 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
41876 the vec_dupv4hi pattern. */
41877 if (GET_MODE_SIZE (from) < 4)
41878 return true;
41881 return false;
41884 /* Return the cost of moving data of mode M between a
41885 register and memory. A value of 2 is the default; this cost is
41886 relative to those in `REGISTER_MOVE_COST'.
41888 This function is used extensively by register_move_cost that is used to
41889 build tables at startup. Make it inline in this case.
41890 When IN is 2, return maximum of in and out move cost.
41892 If moving between registers and memory is more expensive than
41893 between two registers, you should define this macro to express the
41894 relative cost.
41896 Model also increased moving costs of QImode registers in non
41897 Q_REGS classes.
41899 static inline int
41900 inline_memory_move_cost (machine_mode mode, enum reg_class regclass,
41901 int in)
41903 int cost;
41904 if (FLOAT_CLASS_P (regclass))
41906 int index;
41907 switch (mode)
41909 case SFmode:
41910 index = 0;
41911 break;
41912 case DFmode:
41913 index = 1;
41914 break;
41915 case XFmode:
41916 index = 2;
41917 break;
41918 default:
41919 return 100;
41921 if (in == 2)
41922 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
41923 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
41925 if (SSE_CLASS_P (regclass))
41927 int index;
41928 switch (GET_MODE_SIZE (mode))
41930 case 4:
41931 index = 0;
41932 break;
41933 case 8:
41934 index = 1;
41935 break;
41936 case 16:
41937 index = 2;
41938 break;
41939 default:
41940 return 100;
41942 if (in == 2)
41943 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
41944 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
41946 if (MMX_CLASS_P (regclass))
41948 int index;
41949 switch (GET_MODE_SIZE (mode))
41951 case 4:
41952 index = 0;
41953 break;
41954 case 8:
41955 index = 1;
41956 break;
41957 default:
41958 return 100;
41960 if (in)
41961 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
41962 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
41964 switch (GET_MODE_SIZE (mode))
41966 case 1:
41967 if (Q_CLASS_P (regclass) || TARGET_64BIT)
41969 if (!in)
41970 return ix86_cost->int_store[0];
41971 if (TARGET_PARTIAL_REG_DEPENDENCY
41972 && optimize_function_for_speed_p (cfun))
41973 cost = ix86_cost->movzbl_load;
41974 else
41975 cost = ix86_cost->int_load[0];
41976 if (in == 2)
41977 return MAX (cost, ix86_cost->int_store[0]);
41978 return cost;
41980 else
41982 if (in == 2)
41983 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
41984 if (in)
41985 return ix86_cost->movzbl_load;
41986 else
41987 return ix86_cost->int_store[0] + 4;
41989 break;
41990 case 2:
41991 if (in == 2)
41992 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
41993 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
41994 default:
41995 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
41996 if (mode == TFmode)
41997 mode = XFmode;
41998 if (in == 2)
41999 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
42000 else if (in)
42001 cost = ix86_cost->int_load[2];
42002 else
42003 cost = ix86_cost->int_store[2];
42004 return (cost * (((int) GET_MODE_SIZE (mode)
42005 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
42009 static int
42010 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass,
42011 bool in)
42013 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
42017 /* Return the cost of moving data from a register in class CLASS1 to
42018 one in class CLASS2.
42020 It is not required that the cost always equal 2 when FROM is the same as TO;
42021 on some machines it is expensive to move between registers if they are not
42022 general registers. */
42024 static int
42025 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
42026 reg_class_t class2_i)
42028 enum reg_class class1 = (enum reg_class) class1_i;
42029 enum reg_class class2 = (enum reg_class) class2_i;
42031 /* In case we require secondary memory, compute cost of the store followed
42032 by load. In order to avoid bad register allocation choices, we need
42033 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
42035 if (inline_secondary_memory_needed (class1, class2, mode, 0))
42037 int cost = 1;
42039 cost += inline_memory_move_cost (mode, class1, 2);
42040 cost += inline_memory_move_cost (mode, class2, 2);
42042 /* In case of copying from general_purpose_register we may emit multiple
42043 stores followed by single load causing memory size mismatch stall.
42044 Count this as arbitrarily high cost of 20. */
42045 if (targetm.class_max_nregs (class1, mode)
42046 > targetm.class_max_nregs (class2, mode))
42047 cost += 20;
42049 /* In the case of FP/MMX moves, the registers actually overlap, and we
42050 have to switch modes in order to treat them differently. */
42051 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
42052 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
42053 cost += 20;
42055 return cost;
42058 /* Moves between SSE/MMX and integer unit are expensive. */
42059 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
42060 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
42062 /* ??? By keeping returned value relatively high, we limit the number
42063 of moves between integer and MMX/SSE registers for all targets.
42064 Additionally, high value prevents problem with x86_modes_tieable_p(),
42065 where integer modes in MMX/SSE registers are not tieable
42066 because of missing QImode and HImode moves to, from or between
42067 MMX/SSE registers. */
42068 return MAX (8, ix86_cost->mmxsse_to_integer);
42070 if (MAYBE_FLOAT_CLASS_P (class1))
42071 return ix86_cost->fp_move;
42072 if (MAYBE_SSE_CLASS_P (class1))
42073 return ix86_cost->sse_move;
42074 if (MAYBE_MMX_CLASS_P (class1))
42075 return ix86_cost->mmx_move;
42076 return 2;
42079 /* Return TRUE if hard register REGNO can hold a value of machine-mode
42080 MODE. */
42082 bool
42083 ix86_hard_regno_mode_ok (int regno, machine_mode mode)
42085 /* Flags and only flags can only hold CCmode values. */
42086 if (CC_REGNO_P (regno))
42087 return GET_MODE_CLASS (mode) == MODE_CC;
42088 if (GET_MODE_CLASS (mode) == MODE_CC
42089 || GET_MODE_CLASS (mode) == MODE_RANDOM
42090 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
42091 return false;
42092 if (STACK_REGNO_P (regno))
42093 return VALID_FP_MODE_P (mode);
42094 if (MASK_REGNO_P (regno))
42095 return (VALID_MASK_REG_MODE (mode)
42096 || (TARGET_AVX512BW
42097 && VALID_MASK_AVX512BW_MODE (mode)));
42098 if (BND_REGNO_P (regno))
42099 return VALID_BND_REG_MODE (mode);
42100 if (SSE_REGNO_P (regno))
42102 /* We implement the move patterns for all vector modes into and
42103 out of SSE registers, even when no operation instructions
42104 are available. */
42106 /* For AVX-512 we allow, regardless of regno:
42107 - XI mode
42108 - any of 512-bit wide vector mode
42109 - any scalar mode. */
42110 if (TARGET_AVX512F
42111 && (mode == XImode
42112 || VALID_AVX512F_REG_MODE (mode)
42113 || VALID_AVX512F_SCALAR_MODE (mode)))
42114 return true;
42116 /* TODO check for QI/HI scalars. */
42117 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
42118 if (TARGET_AVX512VL
42119 && (mode == OImode
42120 || mode == TImode
42121 || VALID_AVX256_REG_MODE (mode)
42122 || VALID_AVX512VL_128_REG_MODE (mode)))
42123 return true;
42125 /* xmm16-xmm31 are only available for AVX-512. */
42126 if (EXT_REX_SSE_REGNO_P (regno))
42127 return false;
42129 /* OImode and AVX modes are available only when AVX is enabled. */
42130 return ((TARGET_AVX
42131 && VALID_AVX256_REG_OR_OI_MODE (mode))
42132 || VALID_SSE_REG_MODE (mode)
42133 || VALID_SSE2_REG_MODE (mode)
42134 || VALID_MMX_REG_MODE (mode)
42135 || VALID_MMX_REG_MODE_3DNOW (mode));
42137 if (MMX_REGNO_P (regno))
42139 /* We implement the move patterns for 3DNOW modes even in MMX mode,
42140 so if the register is available at all, then we can move data of
42141 the given mode into or out of it. */
42142 return (VALID_MMX_REG_MODE (mode)
42143 || VALID_MMX_REG_MODE_3DNOW (mode));
42146 if (mode == QImode)
42148 /* Take care for QImode values - they can be in non-QI regs,
42149 but then they do cause partial register stalls. */
42150 if (ANY_QI_REGNO_P (regno))
42151 return true;
42152 if (!TARGET_PARTIAL_REG_STALL)
42153 return true;
42154 /* LRA checks if the hard register is OK for the given mode.
42155 QImode values can live in non-QI regs, so we allow all
42156 registers here. */
42157 if (lra_in_progress)
42158 return true;
42159 return !can_create_pseudo_p ();
42161 /* We handle both integer and floats in the general purpose registers. */
42162 else if (VALID_INT_MODE_P (mode))
42163 return true;
42164 else if (VALID_FP_MODE_P (mode))
42165 return true;
42166 else if (VALID_DFP_MODE_P (mode))
42167 return true;
42168 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
42169 on to use that value in smaller contexts, this can easily force a
42170 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
42171 supporting DImode, allow it. */
42172 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
42173 return true;
42175 return false;
42178 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
42179 tieable integer mode. */
42181 static bool
42182 ix86_tieable_integer_mode_p (machine_mode mode)
42184 switch (mode)
42186 case HImode:
42187 case SImode:
42188 return true;
42190 case QImode:
42191 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
42193 case DImode:
42194 return TARGET_64BIT;
42196 default:
42197 return false;
42201 /* Return true if MODE1 is accessible in a register that can hold MODE2
42202 without copying. That is, all register classes that can hold MODE2
42203 can also hold MODE1. */
42205 bool
42206 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
42208 if (mode1 == mode2)
42209 return true;
42211 if (ix86_tieable_integer_mode_p (mode1)
42212 && ix86_tieable_integer_mode_p (mode2))
42213 return true;
42215 /* MODE2 being XFmode implies fp stack or general regs, which means we
42216 can tie any smaller floating point modes to it. Note that we do not
42217 tie this with TFmode. */
42218 if (mode2 == XFmode)
42219 return mode1 == SFmode || mode1 == DFmode;
42221 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
42222 that we can tie it with SFmode. */
42223 if (mode2 == DFmode)
42224 return mode1 == SFmode;
42226 /* If MODE2 is only appropriate for an SSE register, then tie with
42227 any other mode acceptable to SSE registers. */
42228 if (GET_MODE_SIZE (mode2) == 32
42229 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
42230 return (GET_MODE_SIZE (mode1) == 32
42231 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
42232 if (GET_MODE_SIZE (mode2) == 16
42233 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
42234 return (GET_MODE_SIZE (mode1) == 16
42235 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
42237 /* If MODE2 is appropriate for an MMX register, then tie
42238 with any other mode acceptable to MMX registers. */
42239 if (GET_MODE_SIZE (mode2) == 8
42240 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
42241 return (GET_MODE_SIZE (mode1) == 8
42242 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
42244 return false;
42247 /* Return the cost of moving between two registers of mode MODE. */
42249 static int
42250 ix86_set_reg_reg_cost (machine_mode mode)
42252 unsigned int units = UNITS_PER_WORD;
42254 switch (GET_MODE_CLASS (mode))
42256 default:
42257 break;
42259 case MODE_CC:
42260 units = GET_MODE_SIZE (CCmode);
42261 break;
42263 case MODE_FLOAT:
42264 if ((TARGET_SSE && mode == TFmode)
42265 || (TARGET_80387 && mode == XFmode)
42266 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
42267 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
42268 units = GET_MODE_SIZE (mode);
42269 break;
42271 case MODE_COMPLEX_FLOAT:
42272 if ((TARGET_SSE && mode == TCmode)
42273 || (TARGET_80387 && mode == XCmode)
42274 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
42275 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
42276 units = GET_MODE_SIZE (mode);
42277 break;
42279 case MODE_VECTOR_INT:
42280 case MODE_VECTOR_FLOAT:
42281 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
42282 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
42283 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
42284 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
42285 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
42286 units = GET_MODE_SIZE (mode);
42289 /* Return the cost of moving between two registers of mode MODE,
42290 assuming that the move will be in pieces of at most UNITS bytes. */
42291 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
42294 /* Compute a (partial) cost for rtx X. Return true if the complete
42295 cost has been computed, and false if subexpressions should be
42296 scanned. In either case, *TOTAL contains the cost result. */
42298 static bool
42299 ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
42300 int *total, bool speed)
42302 rtx mask;
42303 enum rtx_code code = GET_CODE (x);
42304 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
42305 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
42307 switch (code)
42309 case SET:
42310 if (register_operand (SET_DEST (x), VOIDmode)
42311 && reg_or_0_operand (SET_SRC (x), VOIDmode))
42313 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
42314 return true;
42316 return false;
42318 case CONST_INT:
42319 case CONST:
42320 case LABEL_REF:
42321 case SYMBOL_REF:
42322 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
42323 *total = 3;
42324 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
42325 *total = 2;
42326 else if (flag_pic && SYMBOLIC_CONST (x)
42327 && !(TARGET_64BIT
42328 && (GET_CODE (x) == LABEL_REF
42329 || (GET_CODE (x) == SYMBOL_REF
42330 && SYMBOL_REF_LOCAL_P (x))))
42331 /* Use 0 cost for CONST to improve its propagation. */
42332 && (TARGET_64BIT || GET_CODE (x) != CONST))
42333 *total = 1;
42334 else
42335 *total = 0;
42336 return true;
42338 case CONST_WIDE_INT:
42339 *total = 0;
42340 return true;
42342 case CONST_DOUBLE:
42343 switch (standard_80387_constant_p (x))
42345 case 1: /* 0.0 */
42346 *total = 1;
42347 return true;
42348 default: /* Other constants */
42349 *total = 2;
42350 return true;
42351 case 0:
42352 case -1:
42353 break;
42355 if (SSE_FLOAT_MODE_P (mode))
42357 case CONST_VECTOR:
42358 switch (standard_sse_constant_p (x))
42360 case 0:
42361 break;
42362 case 1: /* 0: xor eliminates false dependency */
42363 *total = 0;
42364 return true;
42365 default: /* -1: cmp contains false dependency */
42366 *total = 1;
42367 return true;
42370 /* Fall back to (MEM (SYMBOL_REF)), since that's where
42371 it'll probably end up. Add a penalty for size. */
42372 *total = (COSTS_N_INSNS (1)
42373 + (flag_pic != 0 && !TARGET_64BIT)
42374 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
42375 return true;
42377 case ZERO_EXTEND:
42378 /* The zero extensions is often completely free on x86_64, so make
42379 it as cheap as possible. */
42380 if (TARGET_64BIT && mode == DImode
42381 && GET_MODE (XEXP (x, 0)) == SImode)
42382 *total = 1;
42383 else if (TARGET_ZERO_EXTEND_WITH_AND)
42384 *total = cost->add;
42385 else
42386 *total = cost->movzx;
42387 return false;
42389 case SIGN_EXTEND:
42390 *total = cost->movsx;
42391 return false;
42393 case ASHIFT:
42394 if (SCALAR_INT_MODE_P (mode)
42395 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
42396 && CONST_INT_P (XEXP (x, 1)))
42398 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
42399 if (value == 1)
42401 *total = cost->add;
42402 return false;
42404 if ((value == 2 || value == 3)
42405 && cost->lea <= cost->shift_const)
42407 *total = cost->lea;
42408 return false;
42411 /* FALLTHRU */
42413 case ROTATE:
42414 case ASHIFTRT:
42415 case LSHIFTRT:
42416 case ROTATERT:
42417 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42419 /* ??? Should be SSE vector operation cost. */
42420 /* At least for published AMD latencies, this really is the same
42421 as the latency for a simple fpu operation like fabs. */
42422 /* V*QImode is emulated with 1-11 insns. */
42423 if (mode == V16QImode || mode == V32QImode)
42425 int count = 11;
42426 if (TARGET_XOP && mode == V16QImode)
42428 /* For XOP we use vpshab, which requires a broadcast of the
42429 value to the variable shift insn. For constants this
42430 means a V16Q const in mem; even when we can perform the
42431 shift with one insn set the cost to prefer paddb. */
42432 if (CONSTANT_P (XEXP (x, 1)))
42434 *total = (cost->fabs
42435 + rtx_cost (XEXP (x, 0), mode, code, 0, speed)
42436 + (speed ? 2 : COSTS_N_BYTES (16)));
42437 return true;
42439 count = 3;
42441 else if (TARGET_SSSE3)
42442 count = 7;
42443 *total = cost->fabs * count;
42445 else
42446 *total = cost->fabs;
42448 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42450 if (CONST_INT_P (XEXP (x, 1)))
42452 if (INTVAL (XEXP (x, 1)) > 32)
42453 *total = cost->shift_const + COSTS_N_INSNS (2);
42454 else
42455 *total = cost->shift_const * 2;
42457 else
42459 if (GET_CODE (XEXP (x, 1)) == AND)
42460 *total = cost->shift_var * 2;
42461 else
42462 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
42465 else
42467 if (CONST_INT_P (XEXP (x, 1)))
42468 *total = cost->shift_const;
42469 else if (SUBREG_P (XEXP (x, 1))
42470 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
42472 /* Return the cost after shift-and truncation. */
42473 *total = cost->shift_var;
42474 return true;
42476 else
42477 *total = cost->shift_var;
42479 return false;
42481 case FMA:
42483 rtx sub;
42485 gcc_assert (FLOAT_MODE_P (mode));
42486 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
42488 /* ??? SSE scalar/vector cost should be used here. */
42489 /* ??? Bald assumption that fma has the same cost as fmul. */
42490 *total = cost->fmul;
42491 *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed);
42493 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
42494 sub = XEXP (x, 0);
42495 if (GET_CODE (sub) == NEG)
42496 sub = XEXP (sub, 0);
42497 *total += rtx_cost (sub, mode, FMA, 0, speed);
42499 sub = XEXP (x, 2);
42500 if (GET_CODE (sub) == NEG)
42501 sub = XEXP (sub, 0);
42502 *total += rtx_cost (sub, mode, FMA, 2, speed);
42503 return true;
42506 case MULT:
42507 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42509 /* ??? SSE scalar cost should be used here. */
42510 *total = cost->fmul;
42511 return false;
42513 else if (X87_FLOAT_MODE_P (mode))
42515 *total = cost->fmul;
42516 return false;
42518 else if (FLOAT_MODE_P (mode))
42520 /* ??? SSE vector cost should be used here. */
42521 *total = cost->fmul;
42522 return false;
42524 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42526 /* V*QImode is emulated with 7-13 insns. */
42527 if (mode == V16QImode || mode == V32QImode)
42529 int extra = 11;
42530 if (TARGET_XOP && mode == V16QImode)
42531 extra = 5;
42532 else if (TARGET_SSSE3)
42533 extra = 6;
42534 *total = cost->fmul * 2 + cost->fabs * extra;
42536 /* V*DImode is emulated with 5-8 insns. */
42537 else if (mode == V2DImode || mode == V4DImode)
42539 if (TARGET_XOP && mode == V2DImode)
42540 *total = cost->fmul * 2 + cost->fabs * 3;
42541 else
42542 *total = cost->fmul * 3 + cost->fabs * 5;
42544 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
42545 insns, including two PMULUDQ. */
42546 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
42547 *total = cost->fmul * 2 + cost->fabs * 5;
42548 else
42549 *total = cost->fmul;
42550 return false;
42552 else
42554 rtx op0 = XEXP (x, 0);
42555 rtx op1 = XEXP (x, 1);
42556 int nbits;
42557 if (CONST_INT_P (XEXP (x, 1)))
42559 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
42560 for (nbits = 0; value != 0; value &= value - 1)
42561 nbits++;
42563 else
42564 /* This is arbitrary. */
42565 nbits = 7;
42567 /* Compute costs correctly for widening multiplication. */
42568 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
42569 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
42570 == GET_MODE_SIZE (mode))
42572 int is_mulwiden = 0;
42573 machine_mode inner_mode = GET_MODE (op0);
42575 if (GET_CODE (op0) == GET_CODE (op1))
42576 is_mulwiden = 1, op1 = XEXP (op1, 0);
42577 else if (CONST_INT_P (op1))
42579 if (GET_CODE (op0) == SIGN_EXTEND)
42580 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
42581 == INTVAL (op1);
42582 else
42583 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
42586 if (is_mulwiden)
42587 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
42590 *total = (cost->mult_init[MODE_INDEX (mode)]
42591 + nbits * cost->mult_bit
42592 + rtx_cost (op0, mode, outer_code, opno, speed)
42593 + rtx_cost (op1, mode, outer_code, opno, speed));
42595 return true;
42598 case DIV:
42599 case UDIV:
42600 case MOD:
42601 case UMOD:
42602 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42603 /* ??? SSE cost should be used here. */
42604 *total = cost->fdiv;
42605 else if (X87_FLOAT_MODE_P (mode))
42606 *total = cost->fdiv;
42607 else if (FLOAT_MODE_P (mode))
42608 /* ??? SSE vector cost should be used here. */
42609 *total = cost->fdiv;
42610 else
42611 *total = cost->divide[MODE_INDEX (mode)];
42612 return false;
42614 case PLUS:
42615 if (GET_MODE_CLASS (mode) == MODE_INT
42616 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
42618 if (GET_CODE (XEXP (x, 0)) == PLUS
42619 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
42620 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
42621 && CONSTANT_P (XEXP (x, 1)))
42623 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
42624 if (val == 2 || val == 4 || val == 8)
42626 *total = cost->lea;
42627 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
42628 outer_code, opno, speed);
42629 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
42630 outer_code, opno, speed);
42631 *total += rtx_cost (XEXP (x, 1), mode,
42632 outer_code, opno, speed);
42633 return true;
42636 else if (GET_CODE (XEXP (x, 0)) == MULT
42637 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
42639 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
42640 if (val == 2 || val == 4 || val == 8)
42642 *total = cost->lea;
42643 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
42644 outer_code, opno, speed);
42645 *total += rtx_cost (XEXP (x, 1), mode,
42646 outer_code, opno, speed);
42647 return true;
42650 else if (GET_CODE (XEXP (x, 0)) == PLUS)
42652 *total = cost->lea;
42653 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
42654 outer_code, opno, speed);
42655 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
42656 outer_code, opno, speed);
42657 *total += rtx_cost (XEXP (x, 1), mode,
42658 outer_code, opno, speed);
42659 return true;
42662 /* FALLTHRU */
42664 case MINUS:
42665 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42667 /* ??? SSE cost should be used here. */
42668 *total = cost->fadd;
42669 return false;
42671 else if (X87_FLOAT_MODE_P (mode))
42673 *total = cost->fadd;
42674 return false;
42676 else if (FLOAT_MODE_P (mode))
42678 /* ??? SSE vector cost should be used here. */
42679 *total = cost->fadd;
42680 return false;
42682 /* FALLTHRU */
42684 case AND:
42685 case IOR:
42686 case XOR:
42687 if (GET_MODE_CLASS (mode) == MODE_INT
42688 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42690 *total = (cost->add * 2
42691 + (rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
42692 << (GET_MODE (XEXP (x, 0)) != DImode))
42693 + (rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed)
42694 << (GET_MODE (XEXP (x, 1)) != DImode)));
42695 return true;
42697 /* FALLTHRU */
42699 case NEG:
42700 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42702 /* ??? SSE cost should be used here. */
42703 *total = cost->fchs;
42704 return false;
42706 else if (X87_FLOAT_MODE_P (mode))
42708 *total = cost->fchs;
42709 return false;
42711 else if (FLOAT_MODE_P (mode))
42713 /* ??? SSE vector cost should be used here. */
42714 *total = cost->fchs;
42715 return false;
42717 /* FALLTHRU */
42719 case NOT:
42720 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42722 /* ??? Should be SSE vector operation cost. */
42723 /* At least for published AMD latencies, this really is the same
42724 as the latency for a simple fpu operation like fabs. */
42725 *total = cost->fabs;
42727 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42728 *total = cost->add * 2;
42729 else
42730 *total = cost->add;
42731 return false;
42733 case COMPARE:
42734 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
42735 && XEXP (XEXP (x, 0), 1) == const1_rtx
42736 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
42737 && XEXP (x, 1) == const0_rtx)
42739 /* This kind of construct is implemented using test[bwl].
42740 Treat it as if we had an AND. */
42741 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
42742 *total = (cost->add
42743 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, outer_code,
42744 opno, speed)
42745 + rtx_cost (const1_rtx, mode, outer_code, opno, speed));
42746 return true;
42749 /* The embedded comparison operand is completely free. */
42750 if (!general_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0)))
42751 && XEXP (x, 1) == const0_rtx)
42752 *total = 0;
42754 return false;
42756 case FLOAT_EXTEND:
42757 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
42758 *total = 0;
42759 return false;
42761 case ABS:
42762 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42763 /* ??? SSE cost should be used here. */
42764 *total = cost->fabs;
42765 else if (X87_FLOAT_MODE_P (mode))
42766 *total = cost->fabs;
42767 else if (FLOAT_MODE_P (mode))
42768 /* ??? SSE vector cost should be used here. */
42769 *total = cost->fabs;
42770 return false;
42772 case SQRT:
42773 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42774 /* ??? SSE cost should be used here. */
42775 *total = cost->fsqrt;
42776 else if (X87_FLOAT_MODE_P (mode))
42777 *total = cost->fsqrt;
42778 else if (FLOAT_MODE_P (mode))
42779 /* ??? SSE vector cost should be used here. */
42780 *total = cost->fsqrt;
42781 return false;
42783 case UNSPEC:
42784 if (XINT (x, 1) == UNSPEC_TP)
42785 *total = 0;
42786 return false;
42788 case VEC_SELECT:
42789 case VEC_CONCAT:
42790 case VEC_DUPLICATE:
42791 /* ??? Assume all of these vector manipulation patterns are
42792 recognizable. In which case they all pretty much have the
42793 same cost. */
42794 *total = cost->fabs;
42795 return true;
42796 case VEC_MERGE:
42797 mask = XEXP (x, 2);
42798 /* This is masked instruction, assume the same cost,
42799 as nonmasked variant. */
42800 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
42801 *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed);
42802 else
42803 *total = cost->fabs;
42804 return true;
42806 default:
42807 return false;
42811 #if TARGET_MACHO
42813 static int current_machopic_label_num;
42815 /* Given a symbol name and its associated stub, write out the
42816 definition of the stub. */
42818 void
42819 machopic_output_stub (FILE *file, const char *symb, const char *stub)
42821 unsigned int length;
42822 char *binder_name, *symbol_name, lazy_ptr_name[32];
42823 int label = ++current_machopic_label_num;
42825 /* For 64-bit we shouldn't get here. */
42826 gcc_assert (!TARGET_64BIT);
42828 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
42829 symb = targetm.strip_name_encoding (symb);
42831 length = strlen (stub);
42832 binder_name = XALLOCAVEC (char, length + 32);
42833 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
42835 length = strlen (symb);
42836 symbol_name = XALLOCAVEC (char, length + 32);
42837 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
42839 sprintf (lazy_ptr_name, "L%d$lz", label);
42841 if (MACHOPIC_ATT_STUB)
42842 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
42843 else if (MACHOPIC_PURE)
42844 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
42845 else
42846 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
42848 fprintf (file, "%s:\n", stub);
42849 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42851 if (MACHOPIC_ATT_STUB)
42853 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
42855 else if (MACHOPIC_PURE)
42857 /* PIC stub. */
42858 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42859 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
42860 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
42861 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
42862 label, lazy_ptr_name, label);
42863 fprintf (file, "\tjmp\t*%%ecx\n");
42865 else
42866 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
42868 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
42869 it needs no stub-binding-helper. */
42870 if (MACHOPIC_ATT_STUB)
42871 return;
42873 fprintf (file, "%s:\n", binder_name);
42875 if (MACHOPIC_PURE)
42877 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
42878 fprintf (file, "\tpushl\t%%ecx\n");
42880 else
42881 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
42883 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
42885 /* N.B. Keep the correspondence of these
42886 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
42887 old-pic/new-pic/non-pic stubs; altering this will break
42888 compatibility with existing dylibs. */
42889 if (MACHOPIC_PURE)
42891 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42892 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
42894 else
42895 /* 16-byte -mdynamic-no-pic stub. */
42896 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
42898 fprintf (file, "%s:\n", lazy_ptr_name);
42899 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42900 fprintf (file, ASM_LONG "%s\n", binder_name);
42902 #endif /* TARGET_MACHO */
42904 /* Order the registers for register allocator. */
42906 void
42907 x86_order_regs_for_local_alloc (void)
42909 int pos = 0;
42910 int i;
42912 /* First allocate the local general purpose registers. */
42913 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42914 if (GENERAL_REGNO_P (i) && call_used_regs[i])
42915 reg_alloc_order [pos++] = i;
42917 /* Global general purpose registers. */
42918 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42919 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
42920 reg_alloc_order [pos++] = i;
42922 /* x87 registers come first in case we are doing FP math
42923 using them. */
42924 if (!TARGET_SSE_MATH)
42925 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42926 reg_alloc_order [pos++] = i;
42928 /* SSE registers. */
42929 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
42930 reg_alloc_order [pos++] = i;
42931 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
42932 reg_alloc_order [pos++] = i;
42934 /* Extended REX SSE registers. */
42935 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
42936 reg_alloc_order [pos++] = i;
42938 /* Mask register. */
42939 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
42940 reg_alloc_order [pos++] = i;
42942 /* MPX bound registers. */
42943 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
42944 reg_alloc_order [pos++] = i;
42946 /* x87 registers. */
42947 if (TARGET_SSE_MATH)
42948 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42949 reg_alloc_order [pos++] = i;
42951 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
42952 reg_alloc_order [pos++] = i;
42954 /* Initialize the rest of array as we do not allocate some registers
42955 at all. */
42956 while (pos < FIRST_PSEUDO_REGISTER)
42957 reg_alloc_order [pos++] = 0;
42960 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
42961 in struct attribute_spec handler. */
42962 static tree
42963 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
42964 tree args,
42965 int,
42966 bool *no_add_attrs)
42968 if (TREE_CODE (*node) != FUNCTION_TYPE
42969 && TREE_CODE (*node) != METHOD_TYPE
42970 && TREE_CODE (*node) != FIELD_DECL
42971 && TREE_CODE (*node) != TYPE_DECL)
42973 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42974 name);
42975 *no_add_attrs = true;
42976 return NULL_TREE;
42978 if (TARGET_64BIT)
42980 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
42981 name);
42982 *no_add_attrs = true;
42983 return NULL_TREE;
42985 if (is_attribute_p ("callee_pop_aggregate_return", name))
42987 tree cst;
42989 cst = TREE_VALUE (args);
42990 if (TREE_CODE (cst) != INTEGER_CST)
42992 warning (OPT_Wattributes,
42993 "%qE attribute requires an integer constant argument",
42994 name);
42995 *no_add_attrs = true;
42997 else if (compare_tree_int (cst, 0) != 0
42998 && compare_tree_int (cst, 1) != 0)
43000 warning (OPT_Wattributes,
43001 "argument to %qE attribute is neither zero, nor one",
43002 name);
43003 *no_add_attrs = true;
43006 return NULL_TREE;
43009 return NULL_TREE;
43012 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
43013 struct attribute_spec.handler. */
43014 static tree
43015 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
43016 bool *no_add_attrs)
43018 if (TREE_CODE (*node) != FUNCTION_TYPE
43019 && TREE_CODE (*node) != METHOD_TYPE
43020 && TREE_CODE (*node) != FIELD_DECL
43021 && TREE_CODE (*node) != TYPE_DECL)
43023 warning (OPT_Wattributes, "%qE attribute only applies to functions",
43024 name);
43025 *no_add_attrs = true;
43026 return NULL_TREE;
43029 /* Can combine regparm with all attributes but fastcall. */
43030 if (is_attribute_p ("ms_abi", name))
43032 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
43034 error ("ms_abi and sysv_abi attributes are not compatible");
43037 return NULL_TREE;
43039 else if (is_attribute_p ("sysv_abi", name))
43041 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
43043 error ("ms_abi and sysv_abi attributes are not compatible");
43046 return NULL_TREE;
43049 return NULL_TREE;
43052 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
43053 struct attribute_spec.handler. */
43054 static tree
43055 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
43056 bool *no_add_attrs)
43058 tree *type = NULL;
43059 if (DECL_P (*node))
43061 if (TREE_CODE (*node) == TYPE_DECL)
43062 type = &TREE_TYPE (*node);
43064 else
43065 type = node;
43067 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
43069 warning (OPT_Wattributes, "%qE attribute ignored",
43070 name);
43071 *no_add_attrs = true;
43074 else if ((is_attribute_p ("ms_struct", name)
43075 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
43076 || ((is_attribute_p ("gcc_struct", name)
43077 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
43079 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
43080 name);
43081 *no_add_attrs = true;
43084 return NULL_TREE;
43087 static tree
43088 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
43089 bool *no_add_attrs)
43091 if (TREE_CODE (*node) != FUNCTION_DECL)
43093 warning (OPT_Wattributes, "%qE attribute only applies to functions",
43094 name);
43095 *no_add_attrs = true;
43097 return NULL_TREE;
43100 static bool
43101 ix86_ms_bitfield_layout_p (const_tree record_type)
43103 return ((TARGET_MS_BITFIELD_LAYOUT
43104 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
43105 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
43108 /* Returns an expression indicating where the this parameter is
43109 located on entry to the FUNCTION. */
43111 static rtx
43112 x86_this_parameter (tree function)
43114 tree type = TREE_TYPE (function);
43115 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
43116 int nregs;
43118 if (TARGET_64BIT)
43120 const int *parm_regs;
43122 if (ix86_function_type_abi (type) == MS_ABI)
43123 parm_regs = x86_64_ms_abi_int_parameter_registers;
43124 else
43125 parm_regs = x86_64_int_parameter_registers;
43126 return gen_rtx_REG (Pmode, parm_regs[aggr]);
43129 nregs = ix86_function_regparm (type, function);
43131 if (nregs > 0 && !stdarg_p (type))
43133 int regno;
43134 unsigned int ccvt = ix86_get_callcvt (type);
43136 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
43137 regno = aggr ? DX_REG : CX_REG;
43138 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
43140 regno = CX_REG;
43141 if (aggr)
43142 return gen_rtx_MEM (SImode,
43143 plus_constant (Pmode, stack_pointer_rtx, 4));
43145 else
43147 regno = AX_REG;
43148 if (aggr)
43150 regno = DX_REG;
43151 if (nregs == 1)
43152 return gen_rtx_MEM (SImode,
43153 plus_constant (Pmode,
43154 stack_pointer_rtx, 4));
43157 return gen_rtx_REG (SImode, regno);
43160 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
43161 aggr ? 8 : 4));
43164 /* Determine whether x86_output_mi_thunk can succeed. */
43166 static bool
43167 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
43168 const_tree function)
43170 /* 64-bit can handle anything. */
43171 if (TARGET_64BIT)
43172 return true;
43174 /* For 32-bit, everything's fine if we have one free register. */
43175 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
43176 return true;
43178 /* Need a free register for vcall_offset. */
43179 if (vcall_offset)
43180 return false;
43182 /* Need a free register for GOT references. */
43183 if (flag_pic && !targetm.binds_local_p (function))
43184 return false;
43186 /* Otherwise ok. */
43187 return true;
43190 /* Output the assembler code for a thunk function. THUNK_DECL is the
43191 declaration for the thunk function itself, FUNCTION is the decl for
43192 the target function. DELTA is an immediate constant offset to be
43193 added to THIS. If VCALL_OFFSET is nonzero, the word at
43194 *(*this + vcall_offset) should be added to THIS. */
43196 static void
43197 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
43198 HOST_WIDE_INT vcall_offset, tree function)
43200 rtx this_param = x86_this_parameter (function);
43201 rtx this_reg, tmp, fnaddr;
43202 unsigned int tmp_regno;
43203 rtx_insn *insn;
43205 if (TARGET_64BIT)
43206 tmp_regno = R10_REG;
43207 else
43209 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
43210 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
43211 tmp_regno = AX_REG;
43212 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
43213 tmp_regno = DX_REG;
43214 else
43215 tmp_regno = CX_REG;
43218 emit_note (NOTE_INSN_PROLOGUE_END);
43220 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
43221 pull it in now and let DELTA benefit. */
43222 if (REG_P (this_param))
43223 this_reg = this_param;
43224 else if (vcall_offset)
43226 /* Put the this parameter into %eax. */
43227 this_reg = gen_rtx_REG (Pmode, AX_REG);
43228 emit_move_insn (this_reg, this_param);
43230 else
43231 this_reg = NULL_RTX;
43233 /* Adjust the this parameter by a fixed constant. */
43234 if (delta)
43236 rtx delta_rtx = GEN_INT (delta);
43237 rtx delta_dst = this_reg ? this_reg : this_param;
43239 if (TARGET_64BIT)
43241 if (!x86_64_general_operand (delta_rtx, Pmode))
43243 tmp = gen_rtx_REG (Pmode, tmp_regno);
43244 emit_move_insn (tmp, delta_rtx);
43245 delta_rtx = tmp;
43249 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
43252 /* Adjust the this parameter by a value stored in the vtable. */
43253 if (vcall_offset)
43255 rtx vcall_addr, vcall_mem, this_mem;
43257 tmp = gen_rtx_REG (Pmode, tmp_regno);
43259 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
43260 if (Pmode != ptr_mode)
43261 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
43262 emit_move_insn (tmp, this_mem);
43264 /* Adjust the this parameter. */
43265 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
43266 if (TARGET_64BIT
43267 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
43269 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
43270 emit_move_insn (tmp2, GEN_INT (vcall_offset));
43271 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
43274 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
43275 if (Pmode != ptr_mode)
43276 emit_insn (gen_addsi_1_zext (this_reg,
43277 gen_rtx_REG (ptr_mode,
43278 REGNO (this_reg)),
43279 vcall_mem));
43280 else
43281 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
43284 /* If necessary, drop THIS back to its stack slot. */
43285 if (this_reg && this_reg != this_param)
43286 emit_move_insn (this_param, this_reg);
43288 fnaddr = XEXP (DECL_RTL (function), 0);
43289 if (TARGET_64BIT)
43291 if (!flag_pic || targetm.binds_local_p (function)
43292 || TARGET_PECOFF)
43294 else
43296 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
43297 tmp = gen_rtx_CONST (Pmode, tmp);
43298 fnaddr = gen_const_mem (Pmode, tmp);
43301 else
43303 if (!flag_pic || targetm.binds_local_p (function))
43305 #if TARGET_MACHO
43306 else if (TARGET_MACHO)
43308 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
43309 fnaddr = XEXP (fnaddr, 0);
43311 #endif /* TARGET_MACHO */
43312 else
43314 tmp = gen_rtx_REG (Pmode, CX_REG);
43315 output_set_got (tmp, NULL_RTX);
43317 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
43318 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
43319 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
43320 fnaddr = gen_const_mem (Pmode, fnaddr);
43324 /* Our sibling call patterns do not allow memories, because we have no
43325 predicate that can distinguish between frame and non-frame memory.
43326 For our purposes here, we can get away with (ab)using a jump pattern,
43327 because we're going to do no optimization. */
43328 if (MEM_P (fnaddr))
43330 if (sibcall_insn_operand (fnaddr, word_mode))
43332 fnaddr = XEXP (DECL_RTL (function), 0);
43333 tmp = gen_rtx_MEM (QImode, fnaddr);
43334 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
43335 tmp = emit_call_insn (tmp);
43336 SIBLING_CALL_P (tmp) = 1;
43338 else
43339 emit_jump_insn (gen_indirect_jump (fnaddr));
43341 else
43343 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
43345 // CM_LARGE_PIC always uses pseudo PIC register which is
43346 // uninitialized. Since FUNCTION is local and calling it
43347 // doesn't go through PLT, we use scratch register %r11 as
43348 // PIC register and initialize it here.
43349 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
43350 ix86_init_large_pic_reg (tmp_regno);
43351 fnaddr = legitimize_pic_address (fnaddr,
43352 gen_rtx_REG (Pmode, tmp_regno));
43355 if (!sibcall_insn_operand (fnaddr, word_mode))
43357 tmp = gen_rtx_REG (word_mode, tmp_regno);
43358 if (GET_MODE (fnaddr) != word_mode)
43359 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
43360 emit_move_insn (tmp, fnaddr);
43361 fnaddr = tmp;
43364 tmp = gen_rtx_MEM (QImode, fnaddr);
43365 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
43366 tmp = emit_call_insn (tmp);
43367 SIBLING_CALL_P (tmp) = 1;
43369 emit_barrier ();
43371 /* Emit just enough of rest_of_compilation to get the insns emitted.
43372 Note that use_thunk calls assemble_start_function et al. */
43373 insn = get_insns ();
43374 shorten_branches (insn);
43375 final_start_function (insn, file, 1);
43376 final (insn, file, 1);
43377 final_end_function ();
43380 static void
43381 x86_file_start (void)
43383 default_file_start ();
43384 if (TARGET_16BIT)
43385 fputs ("\t.code16gcc\n", asm_out_file);
43386 #if TARGET_MACHO
43387 darwin_file_start ();
43388 #endif
43389 if (X86_FILE_START_VERSION_DIRECTIVE)
43390 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
43391 if (X86_FILE_START_FLTUSED)
43392 fputs ("\t.global\t__fltused\n", asm_out_file);
43393 if (ix86_asm_dialect == ASM_INTEL)
43394 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
43398 x86_field_alignment (tree field, int computed)
43400 machine_mode mode;
43401 tree type = TREE_TYPE (field);
43403 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
43404 return computed;
43405 if (TARGET_IAMCU)
43406 return iamcu_alignment (type, computed);
43407 mode = TYPE_MODE (strip_array_types (type));
43408 if (mode == DFmode || mode == DCmode
43409 || GET_MODE_CLASS (mode) == MODE_INT
43410 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
43411 return MIN (32, computed);
43412 return computed;
43415 /* Print call to TARGET to FILE. */
43417 static void
43418 x86_print_call_or_nop (FILE *file, const char *target)
43420 if (flag_nop_mcount)
43421 fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */
43422 else
43423 fprintf (file, "1:\tcall\t%s\n", target);
43426 /* Output assembler code to FILE to increment profiler label # LABELNO
43427 for profiling a function entry. */
43428 void
43429 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
43431 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
43432 : MCOUNT_NAME);
43433 if (TARGET_64BIT)
43435 #ifndef NO_PROFILE_COUNTERS
43436 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
43437 #endif
43439 if (!TARGET_PECOFF && flag_pic)
43440 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
43441 else
43442 x86_print_call_or_nop (file, mcount_name);
43444 else if (flag_pic)
43446 #ifndef NO_PROFILE_COUNTERS
43447 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
43448 LPREFIX, labelno);
43449 #endif
43450 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
43452 else
43454 #ifndef NO_PROFILE_COUNTERS
43455 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
43456 LPREFIX, labelno);
43457 #endif
43458 x86_print_call_or_nop (file, mcount_name);
43461 if (flag_record_mcount)
43463 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
43464 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
43465 fprintf (file, "\t.previous\n");
43469 /* We don't have exact information about the insn sizes, but we may assume
43470 quite safely that we are informed about all 1 byte insns and memory
43471 address sizes. This is enough to eliminate unnecessary padding in
43472 99% of cases. */
43474 static int
43475 min_insn_size (rtx_insn *insn)
43477 int l = 0, len;
43479 if (!INSN_P (insn) || !active_insn_p (insn))
43480 return 0;
43482 /* Discard alignments we've emit and jump instructions. */
43483 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
43484 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
43485 return 0;
43487 /* Important case - calls are always 5 bytes.
43488 It is common to have many calls in the row. */
43489 if (CALL_P (insn)
43490 && symbolic_reference_mentioned_p (PATTERN (insn))
43491 && !SIBLING_CALL_P (insn))
43492 return 5;
43493 len = get_attr_length (insn);
43494 if (len <= 1)
43495 return 1;
43497 /* For normal instructions we rely on get_attr_length being exact,
43498 with a few exceptions. */
43499 if (!JUMP_P (insn))
43501 enum attr_type type = get_attr_type (insn);
43503 switch (type)
43505 case TYPE_MULTI:
43506 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
43507 || asm_noperands (PATTERN (insn)) >= 0)
43508 return 0;
43509 break;
43510 case TYPE_OTHER:
43511 case TYPE_FCMP:
43512 break;
43513 default:
43514 /* Otherwise trust get_attr_length. */
43515 return len;
43518 l = get_attr_length_address (insn);
43519 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
43520 l = 4;
43522 if (l)
43523 return 1+l;
43524 else
43525 return 2;
43528 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43530 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
43531 window. */
43533 static void
43534 ix86_avoid_jump_mispredicts (void)
43536 rtx_insn *insn, *start = get_insns ();
43537 int nbytes = 0, njumps = 0;
43538 bool isjump = false;
43540 /* Look for all minimal intervals of instructions containing 4 jumps.
43541 The intervals are bounded by START and INSN. NBYTES is the total
43542 size of instructions in the interval including INSN and not including
43543 START. When the NBYTES is smaller than 16 bytes, it is possible
43544 that the end of START and INSN ends up in the same 16byte page.
43546 The smallest offset in the page INSN can start is the case where START
43547 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
43548 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
43550 Don't consider asm goto as jump, while it can contain a jump, it doesn't
43551 have to, control transfer to label(s) can be performed through other
43552 means, and also we estimate minimum length of all asm stmts as 0. */
43553 for (insn = start; insn; insn = NEXT_INSN (insn))
43555 int min_size;
43557 if (LABEL_P (insn))
43559 int align = label_to_alignment (insn);
43560 int max_skip = label_to_max_skip (insn);
43562 if (max_skip > 15)
43563 max_skip = 15;
43564 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
43565 already in the current 16 byte page, because otherwise
43566 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
43567 bytes to reach 16 byte boundary. */
43568 if (align <= 0
43569 || (align <= 3 && max_skip != (1 << align) - 1))
43570 max_skip = 0;
43571 if (dump_file)
43572 fprintf (dump_file, "Label %i with max_skip %i\n",
43573 INSN_UID (insn), max_skip);
43574 if (max_skip)
43576 while (nbytes + max_skip >= 16)
43578 start = NEXT_INSN (start);
43579 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43580 || CALL_P (start))
43581 njumps--, isjump = true;
43582 else
43583 isjump = false;
43584 nbytes -= min_insn_size (start);
43587 continue;
43590 min_size = min_insn_size (insn);
43591 nbytes += min_size;
43592 if (dump_file)
43593 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
43594 INSN_UID (insn), min_size);
43595 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
43596 || CALL_P (insn))
43597 njumps++;
43598 else
43599 continue;
43601 while (njumps > 3)
43603 start = NEXT_INSN (start);
43604 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43605 || CALL_P (start))
43606 njumps--, isjump = true;
43607 else
43608 isjump = false;
43609 nbytes -= min_insn_size (start);
43611 gcc_assert (njumps >= 0);
43612 if (dump_file)
43613 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
43614 INSN_UID (start), INSN_UID (insn), nbytes);
43616 if (njumps == 3 && isjump && nbytes < 16)
43618 int padsize = 15 - nbytes + min_insn_size (insn);
43620 if (dump_file)
43621 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
43622 INSN_UID (insn), padsize);
43623 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
43627 #endif
43629 /* AMD Athlon works faster
43630 when RET is not destination of conditional jump or directly preceded
43631 by other jump instruction. We avoid the penalty by inserting NOP just
43632 before the RET instructions in such cases. */
43633 static void
43634 ix86_pad_returns (void)
43636 edge e;
43637 edge_iterator ei;
43639 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43641 basic_block bb = e->src;
43642 rtx_insn *ret = BB_END (bb);
43643 rtx_insn *prev;
43644 bool replace = false;
43646 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
43647 || optimize_bb_for_size_p (bb))
43648 continue;
43649 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
43650 if (active_insn_p (prev) || LABEL_P (prev))
43651 break;
43652 if (prev && LABEL_P (prev))
43654 edge e;
43655 edge_iterator ei;
43657 FOR_EACH_EDGE (e, ei, bb->preds)
43658 if (EDGE_FREQUENCY (e) && e->src->index >= 0
43659 && !(e->flags & EDGE_FALLTHRU))
43661 replace = true;
43662 break;
43665 if (!replace)
43667 prev = prev_active_insn (ret);
43668 if (prev
43669 && ((JUMP_P (prev) && any_condjump_p (prev))
43670 || CALL_P (prev)))
43671 replace = true;
43672 /* Empty functions get branch mispredict even when
43673 the jump destination is not visible to us. */
43674 if (!prev && !optimize_function_for_size_p (cfun))
43675 replace = true;
43677 if (replace)
43679 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
43680 delete_insn (ret);
43685 /* Count the minimum number of instructions in BB. Return 4 if the
43686 number of instructions >= 4. */
43688 static int
43689 ix86_count_insn_bb (basic_block bb)
43691 rtx_insn *insn;
43692 int insn_count = 0;
43694 /* Count number of instructions in this block. Return 4 if the number
43695 of instructions >= 4. */
43696 FOR_BB_INSNS (bb, insn)
43698 /* Only happen in exit blocks. */
43699 if (JUMP_P (insn)
43700 && ANY_RETURN_P (PATTERN (insn)))
43701 break;
43703 if (NONDEBUG_INSN_P (insn)
43704 && GET_CODE (PATTERN (insn)) != USE
43705 && GET_CODE (PATTERN (insn)) != CLOBBER)
43707 insn_count++;
43708 if (insn_count >= 4)
43709 return insn_count;
43713 return insn_count;
43717 /* Count the minimum number of instructions in code path in BB.
43718 Return 4 if the number of instructions >= 4. */
43720 static int
43721 ix86_count_insn (basic_block bb)
43723 edge e;
43724 edge_iterator ei;
43725 int min_prev_count;
43727 /* Only bother counting instructions along paths with no
43728 more than 2 basic blocks between entry and exit. Given
43729 that BB has an edge to exit, determine if a predecessor
43730 of BB has an edge from entry. If so, compute the number
43731 of instructions in the predecessor block. If there
43732 happen to be multiple such blocks, compute the minimum. */
43733 min_prev_count = 4;
43734 FOR_EACH_EDGE (e, ei, bb->preds)
43736 edge prev_e;
43737 edge_iterator prev_ei;
43739 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43741 min_prev_count = 0;
43742 break;
43744 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
43746 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43748 int count = ix86_count_insn_bb (e->src);
43749 if (count < min_prev_count)
43750 min_prev_count = count;
43751 break;
43756 if (min_prev_count < 4)
43757 min_prev_count += ix86_count_insn_bb (bb);
43759 return min_prev_count;
43762 /* Pad short function to 4 instructions. */
43764 static void
43765 ix86_pad_short_function (void)
43767 edge e;
43768 edge_iterator ei;
43770 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43772 rtx_insn *ret = BB_END (e->src);
43773 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
43775 int insn_count = ix86_count_insn (e->src);
43777 /* Pad short function. */
43778 if (insn_count < 4)
43780 rtx_insn *insn = ret;
43782 /* Find epilogue. */
43783 while (insn
43784 && (!NOTE_P (insn)
43785 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
43786 insn = PREV_INSN (insn);
43788 if (!insn)
43789 insn = ret;
43791 /* Two NOPs count as one instruction. */
43792 insn_count = 2 * (4 - insn_count);
43793 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
43799 /* Fix up a Windows system unwinder issue. If an EH region falls through into
43800 the epilogue, the Windows system unwinder will apply epilogue logic and
43801 produce incorrect offsets. This can be avoided by adding a nop between
43802 the last insn that can throw and the first insn of the epilogue. */
43804 static void
43805 ix86_seh_fixup_eh_fallthru (void)
43807 edge e;
43808 edge_iterator ei;
43810 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43812 rtx_insn *insn, *next;
43814 /* Find the beginning of the epilogue. */
43815 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
43816 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
43817 break;
43818 if (insn == NULL)
43819 continue;
43821 /* We only care about preceding insns that can throw. */
43822 insn = prev_active_insn (insn);
43823 if (insn == NULL || !can_throw_internal (insn))
43824 continue;
43826 /* Do not separate calls from their debug information. */
43827 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
43828 if (NOTE_P (next)
43829 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
43830 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
43831 insn = next;
43832 else
43833 break;
43835 emit_insn_after (gen_nops (const1_rtx), insn);
43839 /* Implement machine specific optimizations. We implement padding of returns
43840 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
43841 static void
43842 ix86_reorg (void)
43844 /* We are freeing block_for_insn in the toplev to keep compatibility
43845 with old MDEP_REORGS that are not CFG based. Recompute it now. */
43846 compute_bb_for_insn ();
43848 if (TARGET_SEH && current_function_has_exception_handlers ())
43849 ix86_seh_fixup_eh_fallthru ();
43851 if (optimize && optimize_function_for_speed_p (cfun))
43853 if (TARGET_PAD_SHORT_FUNCTION)
43854 ix86_pad_short_function ();
43855 else if (TARGET_PAD_RETURNS)
43856 ix86_pad_returns ();
43857 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43858 if (TARGET_FOUR_JUMP_LIMIT)
43859 ix86_avoid_jump_mispredicts ();
43860 #endif
43864 /* Return nonzero when QImode register that must be represented via REX prefix
43865 is used. */
43866 bool
43867 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
43869 int i;
43870 extract_insn_cached (insn);
43871 for (i = 0; i < recog_data.n_operands; i++)
43872 if (GENERAL_REG_P (recog_data.operand[i])
43873 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
43874 return true;
43875 return false;
43878 /* Return true when INSN mentions register that must be encoded using REX
43879 prefix. */
43880 bool
43881 x86_extended_reg_mentioned_p (rtx insn)
43883 subrtx_iterator::array_type array;
43884 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
43886 const_rtx x = *iter;
43887 if (REG_P (x)
43888 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
43889 return true;
43891 return false;
43894 /* If profitable, negate (without causing overflow) integer constant
43895 of mode MODE at location LOC. Return true in this case. */
43896 bool
43897 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
43899 HOST_WIDE_INT val;
43901 if (!CONST_INT_P (*loc))
43902 return false;
43904 switch (mode)
43906 case DImode:
43907 /* DImode x86_64 constants must fit in 32 bits. */
43908 gcc_assert (x86_64_immediate_operand (*loc, mode));
43910 mode = SImode;
43911 break;
43913 case SImode:
43914 case HImode:
43915 case QImode:
43916 break;
43918 default:
43919 gcc_unreachable ();
43922 /* Avoid overflows. */
43923 if (mode_signbit_p (mode, *loc))
43924 return false;
43926 val = INTVAL (*loc);
43928 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
43929 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
43930 if ((val < 0 && val != -128)
43931 || val == 128)
43933 *loc = GEN_INT (-val);
43934 return true;
43937 return false;
43940 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
43941 optabs would emit if we didn't have TFmode patterns. */
43943 void
43944 x86_emit_floatuns (rtx operands[2])
43946 rtx_code_label *neglab, *donelab;
43947 rtx i0, i1, f0, in, out;
43948 machine_mode mode, inmode;
43950 inmode = GET_MODE (operands[1]);
43951 gcc_assert (inmode == SImode || inmode == DImode);
43953 out = operands[0];
43954 in = force_reg (inmode, operands[1]);
43955 mode = GET_MODE (out);
43956 neglab = gen_label_rtx ();
43957 donelab = gen_label_rtx ();
43958 f0 = gen_reg_rtx (mode);
43960 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
43962 expand_float (out, in, 0);
43964 emit_jump_insn (gen_jump (donelab));
43965 emit_barrier ();
43967 emit_label (neglab);
43969 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
43970 1, OPTAB_DIRECT);
43971 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
43972 1, OPTAB_DIRECT);
43973 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
43975 expand_float (f0, i0, 0);
43977 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
43979 emit_label (donelab);
43982 static bool canonicalize_perm (struct expand_vec_perm_d *d);
43983 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
43984 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
43985 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
43987 /* Get a vector mode of the same size as the original but with elements
43988 twice as wide. This is only guaranteed to apply to integral vectors. */
43990 static inline machine_mode
43991 get_mode_wider_vector (machine_mode o)
43993 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
43994 machine_mode n = GET_MODE_WIDER_MODE (o);
43995 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
43996 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
43997 return n;
44000 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
44001 fill target with val via vec_duplicate. */
44003 static bool
44004 ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
44006 bool ok;
44007 rtx_insn *insn;
44008 rtx dup;
44010 /* First attempt to recognize VAL as-is. */
44011 dup = gen_rtx_VEC_DUPLICATE (mode, val);
44012 insn = emit_insn (gen_rtx_SET (target, dup));
44013 if (recog_memoized (insn) < 0)
44015 rtx_insn *seq;
44016 /* If that fails, force VAL into a register. */
44018 start_sequence ();
44019 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
44020 seq = get_insns ();
44021 end_sequence ();
44022 if (seq)
44023 emit_insn_before (seq, insn);
44025 ok = recog_memoized (insn) >= 0;
44026 gcc_assert (ok);
44028 return true;
44031 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
44032 with all elements equal to VAR. Return true if successful. */
44034 static bool
44035 ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
44036 rtx target, rtx val)
44038 bool ok;
44040 switch (mode)
44042 case V2SImode:
44043 case V2SFmode:
44044 if (!mmx_ok)
44045 return false;
44046 /* FALLTHRU */
44048 case V4DFmode:
44049 case V4DImode:
44050 case V8SFmode:
44051 case V8SImode:
44052 case V2DFmode:
44053 case V2DImode:
44054 case V4SFmode:
44055 case V4SImode:
44056 case V16SImode:
44057 case V8DImode:
44058 case V16SFmode:
44059 case V8DFmode:
44060 return ix86_vector_duplicate_value (mode, target, val);
44062 case V4HImode:
44063 if (!mmx_ok)
44064 return false;
44065 if (TARGET_SSE || TARGET_3DNOW_A)
44067 rtx x;
44069 val = gen_lowpart (SImode, val);
44070 x = gen_rtx_TRUNCATE (HImode, val);
44071 x = gen_rtx_VEC_DUPLICATE (mode, x);
44072 emit_insn (gen_rtx_SET (target, x));
44073 return true;
44075 goto widen;
44077 case V8QImode:
44078 if (!mmx_ok)
44079 return false;
44080 goto widen;
44082 case V8HImode:
44083 if (TARGET_AVX2)
44084 return ix86_vector_duplicate_value (mode, target, val);
44086 if (TARGET_SSE2)
44088 struct expand_vec_perm_d dperm;
44089 rtx tmp1, tmp2;
44091 permute:
44092 memset (&dperm, 0, sizeof (dperm));
44093 dperm.target = target;
44094 dperm.vmode = mode;
44095 dperm.nelt = GET_MODE_NUNITS (mode);
44096 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
44097 dperm.one_operand_p = true;
44099 /* Extend to SImode using a paradoxical SUBREG. */
44100 tmp1 = gen_reg_rtx (SImode);
44101 emit_move_insn (tmp1, gen_lowpart (SImode, val));
44103 /* Insert the SImode value as low element of a V4SImode vector. */
44104 tmp2 = gen_reg_rtx (V4SImode);
44105 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
44106 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
44108 ok = (expand_vec_perm_1 (&dperm)
44109 || expand_vec_perm_broadcast_1 (&dperm));
44110 gcc_assert (ok);
44111 return ok;
44113 goto widen;
44115 case V16QImode:
44116 if (TARGET_AVX2)
44117 return ix86_vector_duplicate_value (mode, target, val);
44119 if (TARGET_SSE2)
44120 goto permute;
44121 goto widen;
44123 widen:
44124 /* Replicate the value once into the next wider mode and recurse. */
44126 machine_mode smode, wsmode, wvmode;
44127 rtx x;
44129 smode = GET_MODE_INNER (mode);
44130 wvmode = get_mode_wider_vector (mode);
44131 wsmode = GET_MODE_INNER (wvmode);
44133 val = convert_modes (wsmode, smode, val, true);
44134 x = expand_simple_binop (wsmode, ASHIFT, val,
44135 GEN_INT (GET_MODE_BITSIZE (smode)),
44136 NULL_RTX, 1, OPTAB_LIB_WIDEN);
44137 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
44139 x = gen_reg_rtx (wvmode);
44140 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
44141 gcc_assert (ok);
44142 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
44143 return ok;
44146 case V16HImode:
44147 case V32QImode:
44148 if (TARGET_AVX2)
44149 return ix86_vector_duplicate_value (mode, target, val);
44150 else
44152 machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
44153 rtx x = gen_reg_rtx (hvmode);
44155 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
44156 gcc_assert (ok);
44158 x = gen_rtx_VEC_CONCAT (mode, x, x);
44159 emit_insn (gen_rtx_SET (target, x));
44161 return true;
44163 case V64QImode:
44164 case V32HImode:
44165 if (TARGET_AVX512BW)
44166 return ix86_vector_duplicate_value (mode, target, val);
44167 else
44169 machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
44170 rtx x = gen_reg_rtx (hvmode);
44172 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
44173 gcc_assert (ok);
44175 x = gen_rtx_VEC_CONCAT (mode, x, x);
44176 emit_insn (gen_rtx_SET (target, x));
44178 return true;
44180 default:
44181 return false;
44185 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
44186 whose ONE_VAR element is VAR, and other elements are zero. Return true
44187 if successful. */
44189 static bool
44190 ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
44191 rtx target, rtx var, int one_var)
44193 machine_mode vsimode;
44194 rtx new_target;
44195 rtx x, tmp;
44196 bool use_vector_set = false;
44198 switch (mode)
44200 case V2DImode:
44201 /* For SSE4.1, we normally use vector set. But if the second
44202 element is zero and inter-unit moves are OK, we use movq
44203 instead. */
44204 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
44205 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
44206 && one_var == 0));
44207 break;
44208 case V16QImode:
44209 case V4SImode:
44210 case V4SFmode:
44211 use_vector_set = TARGET_SSE4_1;
44212 break;
44213 case V8HImode:
44214 use_vector_set = TARGET_SSE2;
44215 break;
44216 case V4HImode:
44217 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
44218 break;
44219 case V32QImode:
44220 case V16HImode:
44221 case V8SImode:
44222 case V8SFmode:
44223 case V4DFmode:
44224 use_vector_set = TARGET_AVX;
44225 break;
44226 case V4DImode:
44227 /* Use ix86_expand_vector_set in 64bit mode only. */
44228 use_vector_set = TARGET_AVX && TARGET_64BIT;
44229 break;
44230 default:
44231 break;
44234 if (use_vector_set)
44236 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
44237 var = force_reg (GET_MODE_INNER (mode), var);
44238 ix86_expand_vector_set (mmx_ok, target, var, one_var);
44239 return true;
44242 switch (mode)
44244 case V2SFmode:
44245 case V2SImode:
44246 if (!mmx_ok)
44247 return false;
44248 /* FALLTHRU */
44250 case V2DFmode:
44251 case V2DImode:
44252 if (one_var != 0)
44253 return false;
44254 var = force_reg (GET_MODE_INNER (mode), var);
44255 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
44256 emit_insn (gen_rtx_SET (target, x));
44257 return true;
44259 case V4SFmode:
44260 case V4SImode:
44261 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
44262 new_target = gen_reg_rtx (mode);
44263 else
44264 new_target = target;
44265 var = force_reg (GET_MODE_INNER (mode), var);
44266 x = gen_rtx_VEC_DUPLICATE (mode, var);
44267 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
44268 emit_insn (gen_rtx_SET (new_target, x));
44269 if (one_var != 0)
44271 /* We need to shuffle the value to the correct position, so
44272 create a new pseudo to store the intermediate result. */
44274 /* With SSE2, we can use the integer shuffle insns. */
44275 if (mode != V4SFmode && TARGET_SSE2)
44277 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
44278 const1_rtx,
44279 GEN_INT (one_var == 1 ? 0 : 1),
44280 GEN_INT (one_var == 2 ? 0 : 1),
44281 GEN_INT (one_var == 3 ? 0 : 1)));
44282 if (target != new_target)
44283 emit_move_insn (target, new_target);
44284 return true;
44287 /* Otherwise convert the intermediate result to V4SFmode and
44288 use the SSE1 shuffle instructions. */
44289 if (mode != V4SFmode)
44291 tmp = gen_reg_rtx (V4SFmode);
44292 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
44294 else
44295 tmp = new_target;
44297 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
44298 const1_rtx,
44299 GEN_INT (one_var == 1 ? 0 : 1),
44300 GEN_INT (one_var == 2 ? 0+4 : 1+4),
44301 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
44303 if (mode != V4SFmode)
44304 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
44305 else if (tmp != target)
44306 emit_move_insn (target, tmp);
44308 else if (target != new_target)
44309 emit_move_insn (target, new_target);
44310 return true;
44312 case V8HImode:
44313 case V16QImode:
44314 vsimode = V4SImode;
44315 goto widen;
44316 case V4HImode:
44317 case V8QImode:
44318 if (!mmx_ok)
44319 return false;
44320 vsimode = V2SImode;
44321 goto widen;
44322 widen:
44323 if (one_var != 0)
44324 return false;
44326 /* Zero extend the variable element to SImode and recurse. */
44327 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
44329 x = gen_reg_rtx (vsimode);
44330 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
44331 var, one_var))
44332 gcc_unreachable ();
44334 emit_move_insn (target, gen_lowpart (mode, x));
44335 return true;
44337 default:
44338 return false;
44342 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
44343 consisting of the values in VALS. It is known that all elements
44344 except ONE_VAR are constants. Return true if successful. */
44346 static bool
44347 ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
44348 rtx target, rtx vals, int one_var)
44350 rtx var = XVECEXP (vals, 0, one_var);
44351 machine_mode wmode;
44352 rtx const_vec, x;
44354 const_vec = copy_rtx (vals);
44355 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
44356 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
44358 switch (mode)
44360 case V2DFmode:
44361 case V2DImode:
44362 case V2SFmode:
44363 case V2SImode:
44364 /* For the two element vectors, it's just as easy to use
44365 the general case. */
44366 return false;
44368 case V4DImode:
44369 /* Use ix86_expand_vector_set in 64bit mode only. */
44370 if (!TARGET_64BIT)
44371 return false;
44372 case V4DFmode:
44373 case V8SFmode:
44374 case V8SImode:
44375 case V16HImode:
44376 case V32QImode:
44377 case V4SFmode:
44378 case V4SImode:
44379 case V8HImode:
44380 case V4HImode:
44381 break;
44383 case V16QImode:
44384 if (TARGET_SSE4_1)
44385 break;
44386 wmode = V8HImode;
44387 goto widen;
44388 case V8QImode:
44389 wmode = V4HImode;
44390 goto widen;
44391 widen:
44392 /* There's no way to set one QImode entry easily. Combine
44393 the variable value with its adjacent constant value, and
44394 promote to an HImode set. */
44395 x = XVECEXP (vals, 0, one_var ^ 1);
44396 if (one_var & 1)
44398 var = convert_modes (HImode, QImode, var, true);
44399 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
44400 NULL_RTX, 1, OPTAB_LIB_WIDEN);
44401 x = GEN_INT (INTVAL (x) & 0xff);
44403 else
44405 var = convert_modes (HImode, QImode, var, true);
44406 x = gen_int_mode (INTVAL (x) << 8, HImode);
44408 if (x != const0_rtx)
44409 var = expand_simple_binop (HImode, IOR, var, x, var,
44410 1, OPTAB_LIB_WIDEN);
44412 x = gen_reg_rtx (wmode);
44413 emit_move_insn (x, gen_lowpart (wmode, const_vec));
44414 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
44416 emit_move_insn (target, gen_lowpart (mode, x));
44417 return true;
44419 default:
44420 return false;
44423 emit_move_insn (target, const_vec);
44424 ix86_expand_vector_set (mmx_ok, target, var, one_var);
44425 return true;
44428 /* A subroutine of ix86_expand_vector_init_general. Use vector
44429 concatenate to handle the most general case: all values variable,
44430 and none identical. */
44432 static void
44433 ix86_expand_vector_init_concat (machine_mode mode,
44434 rtx target, rtx *ops, int n)
44436 machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
44437 rtx first[16], second[8], third[4];
44438 rtvec v;
44439 int i, j;
44441 switch (n)
44443 case 2:
44444 switch (mode)
44446 case V16SImode:
44447 cmode = V8SImode;
44448 break;
44449 case V16SFmode:
44450 cmode = V8SFmode;
44451 break;
44452 case V8DImode:
44453 cmode = V4DImode;
44454 break;
44455 case V8DFmode:
44456 cmode = V4DFmode;
44457 break;
44458 case V8SImode:
44459 cmode = V4SImode;
44460 break;
44461 case V8SFmode:
44462 cmode = V4SFmode;
44463 break;
44464 case V4DImode:
44465 cmode = V2DImode;
44466 break;
44467 case V4DFmode:
44468 cmode = V2DFmode;
44469 break;
44470 case V4SImode:
44471 cmode = V2SImode;
44472 break;
44473 case V4SFmode:
44474 cmode = V2SFmode;
44475 break;
44476 case V2DImode:
44477 cmode = DImode;
44478 break;
44479 case V2SImode:
44480 cmode = SImode;
44481 break;
44482 case V2DFmode:
44483 cmode = DFmode;
44484 break;
44485 case V2SFmode:
44486 cmode = SFmode;
44487 break;
44488 default:
44489 gcc_unreachable ();
44492 if (!register_operand (ops[1], cmode))
44493 ops[1] = force_reg (cmode, ops[1]);
44494 if (!register_operand (ops[0], cmode))
44495 ops[0] = force_reg (cmode, ops[0]);
44496 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, ops[0],
44497 ops[1])));
44498 break;
44500 case 4:
44501 switch (mode)
44503 case V4DImode:
44504 cmode = V2DImode;
44505 break;
44506 case V4DFmode:
44507 cmode = V2DFmode;
44508 break;
44509 case V4SImode:
44510 cmode = V2SImode;
44511 break;
44512 case V4SFmode:
44513 cmode = V2SFmode;
44514 break;
44515 default:
44516 gcc_unreachable ();
44518 goto half;
44520 case 8:
44521 switch (mode)
44523 case V8DImode:
44524 cmode = V2DImode;
44525 hmode = V4DImode;
44526 break;
44527 case V8DFmode:
44528 cmode = V2DFmode;
44529 hmode = V4DFmode;
44530 break;
44531 case V8SImode:
44532 cmode = V2SImode;
44533 hmode = V4SImode;
44534 break;
44535 case V8SFmode:
44536 cmode = V2SFmode;
44537 hmode = V4SFmode;
44538 break;
44539 default:
44540 gcc_unreachable ();
44542 goto half;
44544 case 16:
44545 switch (mode)
44547 case V16SImode:
44548 cmode = V2SImode;
44549 hmode = V4SImode;
44550 gmode = V8SImode;
44551 break;
44552 case V16SFmode:
44553 cmode = V2SFmode;
44554 hmode = V4SFmode;
44555 gmode = V8SFmode;
44556 break;
44557 default:
44558 gcc_unreachable ();
44560 goto half;
44562 half:
44563 /* FIXME: We process inputs backward to help RA. PR 36222. */
44564 i = n - 1;
44565 j = (n >> 1) - 1;
44566 for (; i > 0; i -= 2, j--)
44568 first[j] = gen_reg_rtx (cmode);
44569 v = gen_rtvec (2, ops[i - 1], ops[i]);
44570 ix86_expand_vector_init (false, first[j],
44571 gen_rtx_PARALLEL (cmode, v));
44574 n >>= 1;
44575 if (n > 4)
44577 gcc_assert (hmode != VOIDmode);
44578 gcc_assert (gmode != VOIDmode);
44579 for (i = j = 0; i < n; i += 2, j++)
44581 second[j] = gen_reg_rtx (hmode);
44582 ix86_expand_vector_init_concat (hmode, second [j],
44583 &first [i], 2);
44585 n >>= 1;
44586 for (i = j = 0; i < n; i += 2, j++)
44588 third[j] = gen_reg_rtx (gmode);
44589 ix86_expand_vector_init_concat (gmode, third[j],
44590 &second[i], 2);
44592 n >>= 1;
44593 ix86_expand_vector_init_concat (mode, target, third, n);
44595 else if (n > 2)
44597 gcc_assert (hmode != VOIDmode);
44598 for (i = j = 0; i < n; i += 2, j++)
44600 second[j] = gen_reg_rtx (hmode);
44601 ix86_expand_vector_init_concat (hmode, second [j],
44602 &first [i], 2);
44604 n >>= 1;
44605 ix86_expand_vector_init_concat (mode, target, second, n);
44607 else
44608 ix86_expand_vector_init_concat (mode, target, first, n);
44609 break;
44611 default:
44612 gcc_unreachable ();
44616 /* A subroutine of ix86_expand_vector_init_general. Use vector
44617 interleave to handle the most general case: all values variable,
44618 and none identical. */
44620 static void
44621 ix86_expand_vector_init_interleave (machine_mode mode,
44622 rtx target, rtx *ops, int n)
44624 machine_mode first_imode, second_imode, third_imode, inner_mode;
44625 int i, j;
44626 rtx op0, op1;
44627 rtx (*gen_load_even) (rtx, rtx, rtx);
44628 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
44629 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
44631 switch (mode)
44633 case V8HImode:
44634 gen_load_even = gen_vec_setv8hi;
44635 gen_interleave_first_low = gen_vec_interleave_lowv4si;
44636 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44637 inner_mode = HImode;
44638 first_imode = V4SImode;
44639 second_imode = V2DImode;
44640 third_imode = VOIDmode;
44641 break;
44642 case V16QImode:
44643 gen_load_even = gen_vec_setv16qi;
44644 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
44645 gen_interleave_second_low = gen_vec_interleave_lowv4si;
44646 inner_mode = QImode;
44647 first_imode = V8HImode;
44648 second_imode = V4SImode;
44649 third_imode = V2DImode;
44650 break;
44651 default:
44652 gcc_unreachable ();
44655 for (i = 0; i < n; i++)
44657 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
44658 op0 = gen_reg_rtx (SImode);
44659 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
44661 /* Insert the SImode value as low element of V4SImode vector. */
44662 op1 = gen_reg_rtx (V4SImode);
44663 op0 = gen_rtx_VEC_MERGE (V4SImode,
44664 gen_rtx_VEC_DUPLICATE (V4SImode,
44665 op0),
44666 CONST0_RTX (V4SImode),
44667 const1_rtx);
44668 emit_insn (gen_rtx_SET (op1, op0));
44670 /* Cast the V4SImode vector back to a vector in orignal mode. */
44671 op0 = gen_reg_rtx (mode);
44672 emit_move_insn (op0, gen_lowpart (mode, op1));
44674 /* Load even elements into the second position. */
44675 emit_insn (gen_load_even (op0,
44676 force_reg (inner_mode,
44677 ops [i + i + 1]),
44678 const1_rtx));
44680 /* Cast vector to FIRST_IMODE vector. */
44681 ops[i] = gen_reg_rtx (first_imode);
44682 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
44685 /* Interleave low FIRST_IMODE vectors. */
44686 for (i = j = 0; i < n; i += 2, j++)
44688 op0 = gen_reg_rtx (first_imode);
44689 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
44691 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
44692 ops[j] = gen_reg_rtx (second_imode);
44693 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
44696 /* Interleave low SECOND_IMODE vectors. */
44697 switch (second_imode)
44699 case V4SImode:
44700 for (i = j = 0; i < n / 2; i += 2, j++)
44702 op0 = gen_reg_rtx (second_imode);
44703 emit_insn (gen_interleave_second_low (op0, ops[i],
44704 ops[i + 1]));
44706 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
44707 vector. */
44708 ops[j] = gen_reg_rtx (third_imode);
44709 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
44711 second_imode = V2DImode;
44712 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44713 /* FALLTHRU */
44715 case V2DImode:
44716 op0 = gen_reg_rtx (second_imode);
44717 emit_insn (gen_interleave_second_low (op0, ops[0],
44718 ops[1]));
44720 /* Cast the SECOND_IMODE vector back to a vector on original
44721 mode. */
44722 emit_insn (gen_rtx_SET (target, gen_lowpart (mode, op0)));
44723 break;
44725 default:
44726 gcc_unreachable ();
44730 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
44731 all values variable, and none identical. */
44733 static void
44734 ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
44735 rtx target, rtx vals)
44737 rtx ops[64], op0, op1, op2, op3, op4, op5;
44738 machine_mode half_mode = VOIDmode;
44739 machine_mode quarter_mode = VOIDmode;
44740 int n, i;
44742 switch (mode)
44744 case V2SFmode:
44745 case V2SImode:
44746 if (!mmx_ok && !TARGET_SSE)
44747 break;
44748 /* FALLTHRU */
44750 case V16SImode:
44751 case V16SFmode:
44752 case V8DFmode:
44753 case V8DImode:
44754 case V8SFmode:
44755 case V8SImode:
44756 case V4DFmode:
44757 case V4DImode:
44758 case V4SFmode:
44759 case V4SImode:
44760 case V2DFmode:
44761 case V2DImode:
44762 n = GET_MODE_NUNITS (mode);
44763 for (i = 0; i < n; i++)
44764 ops[i] = XVECEXP (vals, 0, i);
44765 ix86_expand_vector_init_concat (mode, target, ops, n);
44766 return;
44768 case V32QImode:
44769 half_mode = V16QImode;
44770 goto half;
44772 case V16HImode:
44773 half_mode = V8HImode;
44774 goto half;
44776 half:
44777 n = GET_MODE_NUNITS (mode);
44778 for (i = 0; i < n; i++)
44779 ops[i] = XVECEXP (vals, 0, i);
44780 op0 = gen_reg_rtx (half_mode);
44781 op1 = gen_reg_rtx (half_mode);
44782 ix86_expand_vector_init_interleave (half_mode, op0, ops,
44783 n >> 2);
44784 ix86_expand_vector_init_interleave (half_mode, op1,
44785 &ops [n >> 1], n >> 2);
44786 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op0, op1)));
44787 return;
44789 case V64QImode:
44790 quarter_mode = V16QImode;
44791 half_mode = V32QImode;
44792 goto quarter;
44794 case V32HImode:
44795 quarter_mode = V8HImode;
44796 half_mode = V16HImode;
44797 goto quarter;
44799 quarter:
44800 n = GET_MODE_NUNITS (mode);
44801 for (i = 0; i < n; i++)
44802 ops[i] = XVECEXP (vals, 0, i);
44803 op0 = gen_reg_rtx (quarter_mode);
44804 op1 = gen_reg_rtx (quarter_mode);
44805 op2 = gen_reg_rtx (quarter_mode);
44806 op3 = gen_reg_rtx (quarter_mode);
44807 op4 = gen_reg_rtx (half_mode);
44808 op5 = gen_reg_rtx (half_mode);
44809 ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
44810 n >> 3);
44811 ix86_expand_vector_init_interleave (quarter_mode, op1,
44812 &ops [n >> 2], n >> 3);
44813 ix86_expand_vector_init_interleave (quarter_mode, op2,
44814 &ops [n >> 1], n >> 3);
44815 ix86_expand_vector_init_interleave (quarter_mode, op3,
44816 &ops [(n >> 1) | (n >> 2)], n >> 3);
44817 emit_insn (gen_rtx_SET (op4, gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
44818 emit_insn (gen_rtx_SET (op5, gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
44819 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op4, op5)));
44820 return;
44822 case V16QImode:
44823 if (!TARGET_SSE4_1)
44824 break;
44825 /* FALLTHRU */
44827 case V8HImode:
44828 if (!TARGET_SSE2)
44829 break;
44831 /* Don't use ix86_expand_vector_init_interleave if we can't
44832 move from GPR to SSE register directly. */
44833 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
44834 break;
44836 n = GET_MODE_NUNITS (mode);
44837 for (i = 0; i < n; i++)
44838 ops[i] = XVECEXP (vals, 0, i);
44839 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
44840 return;
44842 case V4HImode:
44843 case V8QImode:
44844 break;
44846 default:
44847 gcc_unreachable ();
44851 int i, j, n_elts, n_words, n_elt_per_word;
44852 machine_mode inner_mode;
44853 rtx words[4], shift;
44855 inner_mode = GET_MODE_INNER (mode);
44856 n_elts = GET_MODE_NUNITS (mode);
44857 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
44858 n_elt_per_word = n_elts / n_words;
44859 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
44861 for (i = 0; i < n_words; ++i)
44863 rtx word = NULL_RTX;
44865 for (j = 0; j < n_elt_per_word; ++j)
44867 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
44868 elt = convert_modes (word_mode, inner_mode, elt, true);
44870 if (j == 0)
44871 word = elt;
44872 else
44874 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
44875 word, 1, OPTAB_LIB_WIDEN);
44876 word = expand_simple_binop (word_mode, IOR, word, elt,
44877 word, 1, OPTAB_LIB_WIDEN);
44881 words[i] = word;
44884 if (n_words == 1)
44885 emit_move_insn (target, gen_lowpart (mode, words[0]));
44886 else if (n_words == 2)
44888 rtx tmp = gen_reg_rtx (mode);
44889 emit_clobber (tmp);
44890 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
44891 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
44892 emit_move_insn (target, tmp);
44894 else if (n_words == 4)
44896 rtx tmp = gen_reg_rtx (V4SImode);
44897 gcc_assert (word_mode == SImode);
44898 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
44899 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
44900 emit_move_insn (target, gen_lowpart (mode, tmp));
44902 else
44903 gcc_unreachable ();
44907 /* Initialize vector TARGET via VALS. Suppress the use of MMX
44908 instructions unless MMX_OK is true. */
44910 void
44911 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
44913 machine_mode mode = GET_MODE (target);
44914 machine_mode inner_mode = GET_MODE_INNER (mode);
44915 int n_elts = GET_MODE_NUNITS (mode);
44916 int n_var = 0, one_var = -1;
44917 bool all_same = true, all_const_zero = true;
44918 int i;
44919 rtx x;
44921 for (i = 0; i < n_elts; ++i)
44923 x = XVECEXP (vals, 0, i);
44924 if (!(CONST_SCALAR_INT_P (x)
44925 || CONST_DOUBLE_P (x)
44926 || CONST_FIXED_P (x)))
44927 n_var++, one_var = i;
44928 else if (x != CONST0_RTX (inner_mode))
44929 all_const_zero = false;
44930 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
44931 all_same = false;
44934 /* Constants are best loaded from the constant pool. */
44935 if (n_var == 0)
44937 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
44938 return;
44941 /* If all values are identical, broadcast the value. */
44942 if (all_same
44943 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
44944 XVECEXP (vals, 0, 0)))
44945 return;
44947 /* Values where only one field is non-constant are best loaded from
44948 the pool and overwritten via move later. */
44949 if (n_var == 1)
44951 if (all_const_zero
44952 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
44953 XVECEXP (vals, 0, one_var),
44954 one_var))
44955 return;
44957 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
44958 return;
44961 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
44964 void
44965 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
44967 machine_mode mode = GET_MODE (target);
44968 machine_mode inner_mode = GET_MODE_INNER (mode);
44969 machine_mode half_mode;
44970 bool use_vec_merge = false;
44971 rtx tmp;
44972 static rtx (*gen_extract[6][2]) (rtx, rtx)
44974 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
44975 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
44976 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
44977 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
44978 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
44979 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
44981 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
44983 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
44984 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
44985 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
44986 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
44987 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
44988 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
44990 int i, j, n;
44991 machine_mode mmode = VOIDmode;
44992 rtx (*gen_blendm) (rtx, rtx, rtx, rtx);
44994 switch (mode)
44996 case V2SFmode:
44997 case V2SImode:
44998 if (mmx_ok)
45000 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
45001 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
45002 if (elt == 0)
45003 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
45004 else
45005 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
45006 emit_insn (gen_rtx_SET (target, tmp));
45007 return;
45009 break;
45011 case V2DImode:
45012 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
45013 if (use_vec_merge)
45014 break;
45016 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
45017 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
45018 if (elt == 0)
45019 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
45020 else
45021 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
45022 emit_insn (gen_rtx_SET (target, tmp));
45023 return;
45025 case V2DFmode:
45027 rtx op0, op1;
45029 /* For the two element vectors, we implement a VEC_CONCAT with
45030 the extraction of the other element. */
45032 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
45033 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
45035 if (elt == 0)
45036 op0 = val, op1 = tmp;
45037 else
45038 op0 = tmp, op1 = val;
45040 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
45041 emit_insn (gen_rtx_SET (target, tmp));
45043 return;
45045 case V4SFmode:
45046 use_vec_merge = TARGET_SSE4_1;
45047 if (use_vec_merge)
45048 break;
45050 switch (elt)
45052 case 0:
45053 use_vec_merge = true;
45054 break;
45056 case 1:
45057 /* tmp = target = A B C D */
45058 tmp = copy_to_reg (target);
45059 /* target = A A B B */
45060 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
45061 /* target = X A B B */
45062 ix86_expand_vector_set (false, target, val, 0);
45063 /* target = A X C D */
45064 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
45065 const1_rtx, const0_rtx,
45066 GEN_INT (2+4), GEN_INT (3+4)));
45067 return;
45069 case 2:
45070 /* tmp = target = A B C D */
45071 tmp = copy_to_reg (target);
45072 /* tmp = X B C D */
45073 ix86_expand_vector_set (false, tmp, val, 0);
45074 /* target = A B X D */
45075 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
45076 const0_rtx, const1_rtx,
45077 GEN_INT (0+4), GEN_INT (3+4)));
45078 return;
45080 case 3:
45081 /* tmp = target = A B C D */
45082 tmp = copy_to_reg (target);
45083 /* tmp = X B C D */
45084 ix86_expand_vector_set (false, tmp, val, 0);
45085 /* target = A B X D */
45086 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
45087 const0_rtx, const1_rtx,
45088 GEN_INT (2+4), GEN_INT (0+4)));
45089 return;
45091 default:
45092 gcc_unreachable ();
45094 break;
45096 case V4SImode:
45097 use_vec_merge = TARGET_SSE4_1;
45098 if (use_vec_merge)
45099 break;
45101 /* Element 0 handled by vec_merge below. */
45102 if (elt == 0)
45104 use_vec_merge = true;
45105 break;
45108 if (TARGET_SSE2)
45110 /* With SSE2, use integer shuffles to swap element 0 and ELT,
45111 store into element 0, then shuffle them back. */
45113 rtx order[4];
45115 order[0] = GEN_INT (elt);
45116 order[1] = const1_rtx;
45117 order[2] = const2_rtx;
45118 order[3] = GEN_INT (3);
45119 order[elt] = const0_rtx;
45121 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
45122 order[1], order[2], order[3]));
45124 ix86_expand_vector_set (false, target, val, 0);
45126 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
45127 order[1], order[2], order[3]));
45129 else
45131 /* For SSE1, we have to reuse the V4SF code. */
45132 rtx t = gen_reg_rtx (V4SFmode);
45133 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
45134 emit_move_insn (target, gen_lowpart (mode, t));
45136 return;
45138 case V8HImode:
45139 use_vec_merge = TARGET_SSE2;
45140 break;
45141 case V4HImode:
45142 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
45143 break;
45145 case V16QImode:
45146 use_vec_merge = TARGET_SSE4_1;
45147 break;
45149 case V8QImode:
45150 break;
45152 case V32QImode:
45153 half_mode = V16QImode;
45154 j = 0;
45155 n = 16;
45156 goto half;
45158 case V16HImode:
45159 half_mode = V8HImode;
45160 j = 1;
45161 n = 8;
45162 goto half;
45164 case V8SImode:
45165 half_mode = V4SImode;
45166 j = 2;
45167 n = 4;
45168 goto half;
45170 case V4DImode:
45171 half_mode = V2DImode;
45172 j = 3;
45173 n = 2;
45174 goto half;
45176 case V8SFmode:
45177 half_mode = V4SFmode;
45178 j = 4;
45179 n = 4;
45180 goto half;
45182 case V4DFmode:
45183 half_mode = V2DFmode;
45184 j = 5;
45185 n = 2;
45186 goto half;
45188 half:
45189 /* Compute offset. */
45190 i = elt / n;
45191 elt %= n;
45193 gcc_assert (i <= 1);
45195 /* Extract the half. */
45196 tmp = gen_reg_rtx (half_mode);
45197 emit_insn (gen_extract[j][i] (tmp, target));
45199 /* Put val in tmp at elt. */
45200 ix86_expand_vector_set (false, tmp, val, elt);
45202 /* Put it back. */
45203 emit_insn (gen_insert[j][i] (target, target, tmp));
45204 return;
45206 case V8DFmode:
45207 if (TARGET_AVX512F)
45209 mmode = QImode;
45210 gen_blendm = gen_avx512f_blendmv8df;
45212 break;
45214 case V8DImode:
45215 if (TARGET_AVX512F)
45217 mmode = QImode;
45218 gen_blendm = gen_avx512f_blendmv8di;
45220 break;
45222 case V16SFmode:
45223 if (TARGET_AVX512F)
45225 mmode = HImode;
45226 gen_blendm = gen_avx512f_blendmv16sf;
45228 break;
45230 case V16SImode:
45231 if (TARGET_AVX512F)
45233 mmode = HImode;
45234 gen_blendm = gen_avx512f_blendmv16si;
45236 break;
45238 case V32HImode:
45239 if (TARGET_AVX512F && TARGET_AVX512BW)
45241 mmode = SImode;
45242 gen_blendm = gen_avx512bw_blendmv32hi;
45244 break;
45246 case V64QImode:
45247 if (TARGET_AVX512F && TARGET_AVX512BW)
45249 mmode = DImode;
45250 gen_blendm = gen_avx512bw_blendmv64qi;
45252 break;
45254 default:
45255 break;
45258 if (mmode != VOIDmode)
45260 tmp = gen_reg_rtx (mode);
45261 emit_insn (gen_rtx_SET (tmp, gen_rtx_VEC_DUPLICATE (mode, val)));
45262 emit_insn (gen_blendm (target, tmp, target,
45263 force_reg (mmode,
45264 gen_int_mode (1 << elt, mmode))));
45266 else if (use_vec_merge)
45268 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
45269 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
45270 emit_insn (gen_rtx_SET (target, tmp));
45272 else
45274 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
45276 emit_move_insn (mem, target);
45278 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
45279 emit_move_insn (tmp, val);
45281 emit_move_insn (target, mem);
45285 void
45286 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
45288 machine_mode mode = GET_MODE (vec);
45289 machine_mode inner_mode = GET_MODE_INNER (mode);
45290 bool use_vec_extr = false;
45291 rtx tmp;
45293 switch (mode)
45295 case V2SImode:
45296 case V2SFmode:
45297 if (!mmx_ok)
45298 break;
45299 /* FALLTHRU */
45301 case V2DFmode:
45302 case V2DImode:
45303 use_vec_extr = true;
45304 break;
45306 case V4SFmode:
45307 use_vec_extr = TARGET_SSE4_1;
45308 if (use_vec_extr)
45309 break;
45311 switch (elt)
45313 case 0:
45314 tmp = vec;
45315 break;
45317 case 1:
45318 case 3:
45319 tmp = gen_reg_rtx (mode);
45320 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
45321 GEN_INT (elt), GEN_INT (elt),
45322 GEN_INT (elt+4), GEN_INT (elt+4)));
45323 break;
45325 case 2:
45326 tmp = gen_reg_rtx (mode);
45327 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
45328 break;
45330 default:
45331 gcc_unreachable ();
45333 vec = tmp;
45334 use_vec_extr = true;
45335 elt = 0;
45336 break;
45338 case V4SImode:
45339 use_vec_extr = TARGET_SSE4_1;
45340 if (use_vec_extr)
45341 break;
45343 if (TARGET_SSE2)
45345 switch (elt)
45347 case 0:
45348 tmp = vec;
45349 break;
45351 case 1:
45352 case 3:
45353 tmp = gen_reg_rtx (mode);
45354 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
45355 GEN_INT (elt), GEN_INT (elt),
45356 GEN_INT (elt), GEN_INT (elt)));
45357 break;
45359 case 2:
45360 tmp = gen_reg_rtx (mode);
45361 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
45362 break;
45364 default:
45365 gcc_unreachable ();
45367 vec = tmp;
45368 use_vec_extr = true;
45369 elt = 0;
45371 else
45373 /* For SSE1, we have to reuse the V4SF code. */
45374 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
45375 gen_lowpart (V4SFmode, vec), elt);
45376 return;
45378 break;
45380 case V8HImode:
45381 use_vec_extr = TARGET_SSE2;
45382 break;
45383 case V4HImode:
45384 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
45385 break;
45387 case V16QImode:
45388 use_vec_extr = TARGET_SSE4_1;
45389 break;
45391 case V8SFmode:
45392 if (TARGET_AVX)
45394 tmp = gen_reg_rtx (V4SFmode);
45395 if (elt < 4)
45396 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
45397 else
45398 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
45399 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45400 return;
45402 break;
45404 case V4DFmode:
45405 if (TARGET_AVX)
45407 tmp = gen_reg_rtx (V2DFmode);
45408 if (elt < 2)
45409 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
45410 else
45411 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
45412 ix86_expand_vector_extract (false, target, tmp, elt & 1);
45413 return;
45415 break;
45417 case V32QImode:
45418 if (TARGET_AVX)
45420 tmp = gen_reg_rtx (V16QImode);
45421 if (elt < 16)
45422 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
45423 else
45424 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
45425 ix86_expand_vector_extract (false, target, tmp, elt & 15);
45426 return;
45428 break;
45430 case V16HImode:
45431 if (TARGET_AVX)
45433 tmp = gen_reg_rtx (V8HImode);
45434 if (elt < 8)
45435 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
45436 else
45437 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
45438 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45439 return;
45441 break;
45443 case V8SImode:
45444 if (TARGET_AVX)
45446 tmp = gen_reg_rtx (V4SImode);
45447 if (elt < 4)
45448 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
45449 else
45450 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
45451 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45452 return;
45454 break;
45456 case V4DImode:
45457 if (TARGET_AVX)
45459 tmp = gen_reg_rtx (V2DImode);
45460 if (elt < 2)
45461 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
45462 else
45463 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
45464 ix86_expand_vector_extract (false, target, tmp, elt & 1);
45465 return;
45467 break;
45469 case V32HImode:
45470 if (TARGET_AVX512BW)
45472 tmp = gen_reg_rtx (V16HImode);
45473 if (elt < 16)
45474 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
45475 else
45476 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
45477 ix86_expand_vector_extract (false, target, tmp, elt & 15);
45478 return;
45480 break;
45482 case V64QImode:
45483 if (TARGET_AVX512BW)
45485 tmp = gen_reg_rtx (V32QImode);
45486 if (elt < 32)
45487 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
45488 else
45489 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
45490 ix86_expand_vector_extract (false, target, tmp, elt & 31);
45491 return;
45493 break;
45495 case V16SFmode:
45496 tmp = gen_reg_rtx (V8SFmode);
45497 if (elt < 8)
45498 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
45499 else
45500 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
45501 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45502 return;
45504 case V8DFmode:
45505 tmp = gen_reg_rtx (V4DFmode);
45506 if (elt < 4)
45507 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
45508 else
45509 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
45510 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45511 return;
45513 case V16SImode:
45514 tmp = gen_reg_rtx (V8SImode);
45515 if (elt < 8)
45516 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
45517 else
45518 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
45519 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45520 return;
45522 case V8DImode:
45523 tmp = gen_reg_rtx (V4DImode);
45524 if (elt < 4)
45525 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
45526 else
45527 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
45528 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45529 return;
45531 case V8QImode:
45532 /* ??? Could extract the appropriate HImode element and shift. */
45533 default:
45534 break;
45537 if (use_vec_extr)
45539 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
45540 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
45542 /* Let the rtl optimizers know about the zero extension performed. */
45543 if (inner_mode == QImode || inner_mode == HImode)
45545 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
45546 target = gen_lowpart (SImode, target);
45549 emit_insn (gen_rtx_SET (target, tmp));
45551 else
45553 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
45555 emit_move_insn (mem, vec);
45557 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
45558 emit_move_insn (target, tmp);
45562 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
45563 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
45564 The upper bits of DEST are undefined, though they shouldn't cause
45565 exceptions (some bits from src or all zeros are ok). */
45567 static void
45568 emit_reduc_half (rtx dest, rtx src, int i)
45570 rtx tem, d = dest;
45571 switch (GET_MODE (src))
45573 case V4SFmode:
45574 if (i == 128)
45575 tem = gen_sse_movhlps (dest, src, src);
45576 else
45577 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
45578 GEN_INT (1 + 4), GEN_INT (1 + 4));
45579 break;
45580 case V2DFmode:
45581 tem = gen_vec_interleave_highv2df (dest, src, src);
45582 break;
45583 case V16QImode:
45584 case V8HImode:
45585 case V4SImode:
45586 case V2DImode:
45587 d = gen_reg_rtx (V1TImode);
45588 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
45589 GEN_INT (i / 2));
45590 break;
45591 case V8SFmode:
45592 if (i == 256)
45593 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
45594 else
45595 tem = gen_avx_shufps256 (dest, src, src,
45596 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
45597 break;
45598 case V4DFmode:
45599 if (i == 256)
45600 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
45601 else
45602 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
45603 break;
45604 case V32QImode:
45605 case V16HImode:
45606 case V8SImode:
45607 case V4DImode:
45608 if (i == 256)
45610 if (GET_MODE (dest) != V4DImode)
45611 d = gen_reg_rtx (V4DImode);
45612 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
45613 gen_lowpart (V4DImode, src),
45614 const1_rtx);
45616 else
45618 d = gen_reg_rtx (V2TImode);
45619 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
45620 GEN_INT (i / 2));
45622 break;
45623 case V64QImode:
45624 case V32HImode:
45625 case V16SImode:
45626 case V16SFmode:
45627 case V8DImode:
45628 case V8DFmode:
45629 if (i > 128)
45630 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
45631 gen_lowpart (V16SImode, src),
45632 gen_lowpart (V16SImode, src),
45633 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
45634 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
45635 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
45636 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
45637 GEN_INT (0xC), GEN_INT (0xD),
45638 GEN_INT (0xE), GEN_INT (0xF),
45639 GEN_INT (0x10), GEN_INT (0x11),
45640 GEN_INT (0x12), GEN_INT (0x13),
45641 GEN_INT (0x14), GEN_INT (0x15),
45642 GEN_INT (0x16), GEN_INT (0x17));
45643 else
45644 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
45645 gen_lowpart (V16SImode, src),
45646 GEN_INT (i == 128 ? 0x2 : 0x1),
45647 GEN_INT (0x3),
45648 GEN_INT (0x3),
45649 GEN_INT (0x3),
45650 GEN_INT (i == 128 ? 0x6 : 0x5),
45651 GEN_INT (0x7),
45652 GEN_INT (0x7),
45653 GEN_INT (0x7),
45654 GEN_INT (i == 128 ? 0xA : 0x9),
45655 GEN_INT (0xB),
45656 GEN_INT (0xB),
45657 GEN_INT (0xB),
45658 GEN_INT (i == 128 ? 0xE : 0xD),
45659 GEN_INT (0xF),
45660 GEN_INT (0xF),
45661 GEN_INT (0xF));
45662 break;
45663 default:
45664 gcc_unreachable ();
45666 emit_insn (tem);
45667 if (d != dest)
45668 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
45671 /* Expand a vector reduction. FN is the binary pattern to reduce;
45672 DEST is the destination; IN is the input vector. */
45674 void
45675 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
45677 rtx half, dst, vec = in;
45678 machine_mode mode = GET_MODE (in);
45679 int i;
45681 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
45682 if (TARGET_SSE4_1
45683 && mode == V8HImode
45684 && fn == gen_uminv8hi3)
45686 emit_insn (gen_sse4_1_phminposuw (dest, in));
45687 return;
45690 for (i = GET_MODE_BITSIZE (mode);
45691 i > GET_MODE_UNIT_BITSIZE (mode);
45692 i >>= 1)
45694 half = gen_reg_rtx (mode);
45695 emit_reduc_half (half, vec, i);
45696 if (i == GET_MODE_UNIT_BITSIZE (mode) * 2)
45697 dst = dest;
45698 else
45699 dst = gen_reg_rtx (mode);
45700 emit_insn (fn (dst, half, vec));
45701 vec = dst;
45705 /* Target hook for scalar_mode_supported_p. */
45706 static bool
45707 ix86_scalar_mode_supported_p (machine_mode mode)
45709 if (DECIMAL_FLOAT_MODE_P (mode))
45710 return default_decimal_float_supported_p ();
45711 else if (mode == TFmode)
45712 return true;
45713 else
45714 return default_scalar_mode_supported_p (mode);
45717 /* Implements target hook vector_mode_supported_p. */
45718 static bool
45719 ix86_vector_mode_supported_p (machine_mode mode)
45721 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
45722 return true;
45723 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
45724 return true;
45725 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
45726 return true;
45727 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
45728 return true;
45729 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
45730 return true;
45731 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
45732 return true;
45733 return false;
45736 /* Implement target hook libgcc_floating_mode_supported_p. */
45737 static bool
45738 ix86_libgcc_floating_mode_supported_p (machine_mode mode)
45740 switch (mode)
45742 case SFmode:
45743 case DFmode:
45744 case XFmode:
45745 return true;
45747 case TFmode:
45748 #ifdef IX86_NO_LIBGCC_TFMODE
45749 return false;
45750 #elif defined IX86_MAYBE_NO_LIBGCC_TFMODE
45751 return TARGET_LONG_DOUBLE_128;
45752 #else
45753 return true;
45754 #endif
45756 default:
45757 return false;
45761 /* Target hook for c_mode_for_suffix. */
45762 static machine_mode
45763 ix86_c_mode_for_suffix (char suffix)
45765 if (suffix == 'q')
45766 return TFmode;
45767 if (suffix == 'w')
45768 return XFmode;
45770 return VOIDmode;
45773 /* Worker function for TARGET_MD_ASM_ADJUST.
45775 We implement asm flag outputs, and maintain source compatibility
45776 with the old cc0-based compiler. */
45778 static rtx_insn *
45779 ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &/*inputs*/,
45780 vec<const char *> &constraints,
45781 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
45783 clobbers.safe_push (gen_rtx_REG (CCFPmode, FPSR_REG));
45784 SET_HARD_REG_BIT (clobbered_regs, FPSR_REG);
45786 bool saw_asm_flag = false;
45788 start_sequence ();
45789 for (unsigned i = 0, n = outputs.length (); i < n; ++i)
45791 const char *con = constraints[i];
45792 if (strncmp (con, "=@cc", 4) != 0)
45793 continue;
45794 con += 4;
45795 if (strchr (con, ',') != NULL)
45797 error ("alternatives not allowed in asm flag output");
45798 continue;
45801 bool invert = false;
45802 if (con[0] == 'n')
45803 invert = true, con++;
45805 machine_mode mode = CCmode;
45806 rtx_code code = UNKNOWN;
45808 switch (con[0])
45810 case 'a':
45811 if (con[1] == 0)
45812 mode = CCAmode, code = EQ;
45813 else if (con[1] == 'e' && con[2] == 0)
45814 mode = CCCmode, code = EQ;
45815 break;
45816 case 'b':
45817 if (con[1] == 0)
45818 mode = CCCmode, code = EQ;
45819 else if (con[1] == 'e' && con[2] == 0)
45820 mode = CCAmode, code = NE;
45821 break;
45822 case 'c':
45823 if (con[1] == 0)
45824 mode = CCCmode, code = EQ;
45825 break;
45826 case 'e':
45827 if (con[1] == 0)
45828 mode = CCZmode, code = EQ;
45829 break;
45830 case 'g':
45831 if (con[1] == 0)
45832 mode = CCGCmode, code = GT;
45833 else if (con[1] == 'e' && con[2] == 0)
45834 mode = CCGCmode, code = GE;
45835 break;
45836 case 'l':
45837 if (con[1] == 0)
45838 mode = CCGCmode, code = LT;
45839 else if (con[1] == 'e' && con[2] == 0)
45840 mode = CCGCmode, code = LE;
45841 break;
45842 case 'o':
45843 if (con[1] == 0)
45844 mode = CCOmode, code = EQ;
45845 break;
45846 case 'p':
45847 if (con[1] == 0)
45848 mode = CCPmode, code = EQ;
45849 break;
45850 case 's':
45851 if (con[1] == 0)
45852 mode = CCSmode, code = EQ;
45853 break;
45854 case 'z':
45855 if (con[1] == 0)
45856 mode = CCZmode, code = EQ;
45857 break;
45859 if (code == UNKNOWN)
45861 error ("unknown asm flag output %qs", constraints[i]);
45862 continue;
45864 if (invert)
45865 code = reverse_condition (code);
45867 rtx dest = outputs[i];
45868 if (!saw_asm_flag)
45870 /* This is the first asm flag output. Here we put the flags
45871 register in as the real output and adjust the condition to
45872 allow it. */
45873 constraints[i] = "=Bf";
45874 outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG);
45875 saw_asm_flag = true;
45877 else
45879 /* We don't need the flags register as output twice. */
45880 constraints[i] = "=X";
45881 outputs[i] = gen_rtx_SCRATCH (SImode);
45884 rtx x = gen_rtx_REG (mode, FLAGS_REG);
45885 x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx);
45887 machine_mode dest_mode = GET_MODE (dest);
45888 if (!SCALAR_INT_MODE_P (dest_mode))
45890 error ("invalid type for asm flag output");
45891 continue;
45894 if (dest_mode == DImode && !TARGET_64BIT)
45895 dest_mode = SImode;
45897 if (dest_mode != QImode)
45899 rtx destqi = gen_reg_rtx (QImode);
45900 emit_insn (gen_rtx_SET (destqi, x));
45902 if (TARGET_ZERO_EXTEND_WITH_AND
45903 && optimize_function_for_speed_p (cfun))
45905 x = force_reg (dest_mode, const0_rtx);
45907 emit_insn (gen_movstrictqi
45908 (gen_lowpart (QImode, x), destqi));
45910 else
45911 x = gen_rtx_ZERO_EXTEND (dest_mode, destqi);
45914 if (dest_mode != GET_MODE (dest))
45916 rtx tmp = gen_reg_rtx (SImode);
45918 emit_insn (gen_rtx_SET (tmp, x));
45919 emit_insn (gen_zero_extendsidi2 (dest, tmp));
45921 else
45922 emit_insn (gen_rtx_SET (dest, x));
45924 rtx_insn *seq = get_insns ();
45925 end_sequence ();
45927 if (saw_asm_flag)
45928 return seq;
45929 else
45931 /* If we had no asm flag outputs, clobber the flags. */
45932 clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
45933 SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
45934 return NULL;
45938 /* Implements target vector targetm.asm.encode_section_info. */
45940 static void ATTRIBUTE_UNUSED
45941 ix86_encode_section_info (tree decl, rtx rtl, int first)
45943 default_encode_section_info (decl, rtl, first);
45945 if (ix86_in_large_data_p (decl))
45946 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
45949 /* Worker function for REVERSE_CONDITION. */
45951 enum rtx_code
45952 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
45954 return (mode != CCFPmode && mode != CCFPUmode
45955 ? reverse_condition (code)
45956 : reverse_condition_maybe_unordered (code));
45959 /* Output code to perform an x87 FP register move, from OPERANDS[1]
45960 to OPERANDS[0]. */
45962 const char *
45963 output_387_reg_move (rtx insn, rtx *operands)
45965 if (REG_P (operands[0]))
45967 if (REG_P (operands[1])
45968 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45970 if (REGNO (operands[0]) == FIRST_STACK_REG)
45971 return output_387_ffreep (operands, 0);
45972 return "fstp\t%y0";
45974 if (STACK_TOP_P (operands[0]))
45975 return "fld%Z1\t%y1";
45976 return "fst\t%y0";
45978 else if (MEM_P (operands[0]))
45980 gcc_assert (REG_P (operands[1]));
45981 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45982 return "fstp%Z0\t%y0";
45983 else
45985 /* There is no non-popping store to memory for XFmode.
45986 So if we need one, follow the store with a load. */
45987 if (GET_MODE (operands[0]) == XFmode)
45988 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
45989 else
45990 return "fst%Z0\t%y0";
45993 else
45994 gcc_unreachable();
45997 /* Output code to perform a conditional jump to LABEL, if C2 flag in
45998 FP status register is set. */
46000 void
46001 ix86_emit_fp_unordered_jump (rtx label)
46003 rtx reg = gen_reg_rtx (HImode);
46004 rtx temp;
46006 emit_insn (gen_x86_fnstsw_1 (reg));
46008 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
46010 emit_insn (gen_x86_sahf_1 (reg));
46012 temp = gen_rtx_REG (CCmode, FLAGS_REG);
46013 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
46015 else
46017 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
46019 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
46020 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
46023 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
46024 gen_rtx_LABEL_REF (VOIDmode, label),
46025 pc_rtx);
46026 temp = gen_rtx_SET (pc_rtx, temp);
46028 emit_jump_insn (temp);
46029 predict_jump (REG_BR_PROB_BASE * 10 / 100);
46032 /* Output code to perform a log1p XFmode calculation. */
46034 void ix86_emit_i387_log1p (rtx op0, rtx op1)
46036 rtx_code_label *label1 = gen_label_rtx ();
46037 rtx_code_label *label2 = gen_label_rtx ();
46039 rtx tmp = gen_reg_rtx (XFmode);
46040 rtx tmp2 = gen_reg_rtx (XFmode);
46041 rtx test;
46043 emit_insn (gen_absxf2 (tmp, op1));
46044 test = gen_rtx_GE (VOIDmode, tmp,
46045 CONST_DOUBLE_FROM_REAL_VALUE (
46046 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
46047 XFmode));
46048 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
46050 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
46051 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
46052 emit_jump (label2);
46054 emit_label (label1);
46055 emit_move_insn (tmp, CONST1_RTX (XFmode));
46056 emit_insn (gen_addxf3 (tmp, op1, tmp));
46057 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
46058 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
46060 emit_label (label2);
46063 /* Emit code for round calculation. */
46064 void ix86_emit_i387_round (rtx op0, rtx op1)
46066 machine_mode inmode = GET_MODE (op1);
46067 machine_mode outmode = GET_MODE (op0);
46068 rtx e1, e2, res, tmp, tmp1, half;
46069 rtx scratch = gen_reg_rtx (HImode);
46070 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
46071 rtx_code_label *jump_label = gen_label_rtx ();
46072 rtx insn;
46073 rtx (*gen_abs) (rtx, rtx);
46074 rtx (*gen_neg) (rtx, rtx);
46076 switch (inmode)
46078 case SFmode:
46079 gen_abs = gen_abssf2;
46080 break;
46081 case DFmode:
46082 gen_abs = gen_absdf2;
46083 break;
46084 case XFmode:
46085 gen_abs = gen_absxf2;
46086 break;
46087 default:
46088 gcc_unreachable ();
46091 switch (outmode)
46093 case SFmode:
46094 gen_neg = gen_negsf2;
46095 break;
46096 case DFmode:
46097 gen_neg = gen_negdf2;
46098 break;
46099 case XFmode:
46100 gen_neg = gen_negxf2;
46101 break;
46102 case HImode:
46103 gen_neg = gen_neghi2;
46104 break;
46105 case SImode:
46106 gen_neg = gen_negsi2;
46107 break;
46108 case DImode:
46109 gen_neg = gen_negdi2;
46110 break;
46111 default:
46112 gcc_unreachable ();
46115 e1 = gen_reg_rtx (inmode);
46116 e2 = gen_reg_rtx (inmode);
46117 res = gen_reg_rtx (outmode);
46119 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
46121 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
46123 /* scratch = fxam(op1) */
46124 emit_insn (gen_rtx_SET (scratch,
46125 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
46126 UNSPEC_FXAM)));
46127 /* e1 = fabs(op1) */
46128 emit_insn (gen_abs (e1, op1));
46130 /* e2 = e1 + 0.5 */
46131 half = force_reg (inmode, half);
46132 emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (inmode, e1, half)));
46134 /* res = floor(e2) */
46135 if (inmode != XFmode)
46137 tmp1 = gen_reg_rtx (XFmode);
46139 emit_insn (gen_rtx_SET (tmp1, gen_rtx_FLOAT_EXTEND (XFmode, e2)));
46141 else
46142 tmp1 = e2;
46144 switch (outmode)
46146 case SFmode:
46147 case DFmode:
46149 rtx tmp0 = gen_reg_rtx (XFmode);
46151 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
46153 emit_insn (gen_rtx_SET (res,
46154 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
46155 UNSPEC_TRUNC_NOOP)));
46157 break;
46158 case XFmode:
46159 emit_insn (gen_frndintxf2_floor (res, tmp1));
46160 break;
46161 case HImode:
46162 emit_insn (gen_lfloorxfhi2 (res, tmp1));
46163 break;
46164 case SImode:
46165 emit_insn (gen_lfloorxfsi2 (res, tmp1));
46166 break;
46167 case DImode:
46168 emit_insn (gen_lfloorxfdi2 (res, tmp1));
46169 break;
46170 default:
46171 gcc_unreachable ();
46174 /* flags = signbit(a) */
46175 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
46177 /* if (flags) then res = -res */
46178 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
46179 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
46180 gen_rtx_LABEL_REF (VOIDmode, jump_label),
46181 pc_rtx);
46182 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
46183 predict_jump (REG_BR_PROB_BASE * 50 / 100);
46184 JUMP_LABEL (insn) = jump_label;
46186 emit_insn (gen_neg (res, res));
46188 emit_label (jump_label);
46189 LABEL_NUSES (jump_label) = 1;
46191 emit_move_insn (op0, res);
46194 /* Output code to perform a Newton-Rhapson approximation of a single precision
46195 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
46197 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
46199 rtx x0, x1, e0, e1;
46201 x0 = gen_reg_rtx (mode);
46202 e0 = gen_reg_rtx (mode);
46203 e1 = gen_reg_rtx (mode);
46204 x1 = gen_reg_rtx (mode);
46206 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
46208 b = force_reg (mode, b);
46210 /* x0 = rcp(b) estimate */
46211 if (mode == V16SFmode || mode == V8DFmode)
46212 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
46213 UNSPEC_RCP14)));
46214 else
46215 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
46216 UNSPEC_RCP)));
46218 /* e0 = x0 * b */
46219 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b)));
46221 /* e0 = x0 * e0 */
46222 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0)));
46224 /* e1 = x0 + x0 */
46225 emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0)));
46227 /* x1 = e1 - e0 */
46228 emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0)));
46230 /* res = a * x1 */
46231 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1)));
46234 /* Output code to perform a Newton-Rhapson approximation of a
46235 single precision floating point [reciprocal] square root. */
46237 void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode,
46238 bool recip)
46240 rtx x0, e0, e1, e2, e3, mthree, mhalf;
46241 REAL_VALUE_TYPE r;
46242 int unspec;
46244 x0 = gen_reg_rtx (mode);
46245 e0 = gen_reg_rtx (mode);
46246 e1 = gen_reg_rtx (mode);
46247 e2 = gen_reg_rtx (mode);
46248 e3 = gen_reg_rtx (mode);
46250 real_from_integer (&r, VOIDmode, -3, SIGNED);
46251 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
46253 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
46254 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
46255 unspec = UNSPEC_RSQRT;
46257 if (VECTOR_MODE_P (mode))
46259 mthree = ix86_build_const_vector (mode, true, mthree);
46260 mhalf = ix86_build_const_vector (mode, true, mhalf);
46261 /* There is no 512-bit rsqrt. There is however rsqrt14. */
46262 if (GET_MODE_SIZE (mode) == 64)
46263 unspec = UNSPEC_RSQRT14;
46266 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
46267 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
46269 a = force_reg (mode, a);
46271 /* x0 = rsqrt(a) estimate */
46272 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
46273 unspec)));
46275 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
46276 if (!recip)
46278 rtx zero, mask;
46280 zero = gen_reg_rtx (mode);
46281 mask = gen_reg_rtx (mode);
46283 zero = force_reg (mode, CONST0_RTX(mode));
46285 /* Handle masked compare. */
46286 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
46288 mask = gen_reg_rtx (HImode);
46289 /* Imm value 0x4 corresponds to not-equal comparison. */
46290 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
46291 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
46293 else
46295 emit_insn (gen_rtx_SET (mask, gen_rtx_NE (mode, zero, a)));
46297 emit_insn (gen_rtx_SET (x0, gen_rtx_AND (mode, x0, mask)));
46301 /* e0 = x0 * a */
46302 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a)));
46303 /* e1 = e0 * x0 */
46304 emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, e0, x0)));
46306 /* e2 = e1 - 3. */
46307 mthree = force_reg (mode, mthree);
46308 emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (mode, e1, mthree)));
46310 mhalf = force_reg (mode, mhalf);
46311 if (recip)
46312 /* e3 = -.5 * x0 */
46313 emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, x0, mhalf)));
46314 else
46315 /* e3 = -.5 * e0 */
46316 emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, e0, mhalf)));
46317 /* ret = e2 * e3 */
46318 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e2, e3)));
46321 #ifdef TARGET_SOLARIS
46322 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
46324 static void
46325 i386_solaris_elf_named_section (const char *name, unsigned int flags,
46326 tree decl)
46328 /* With Binutils 2.15, the "@unwind" marker must be specified on
46329 every occurrence of the ".eh_frame" section, not just the first
46330 one. */
46331 if (TARGET_64BIT
46332 && strcmp (name, ".eh_frame") == 0)
46334 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
46335 flags & SECTION_WRITE ? "aw" : "a");
46336 return;
46339 #ifndef USE_GAS
46340 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
46342 solaris_elf_asm_comdat_section (name, flags, decl);
46343 return;
46345 #endif
46347 default_elf_asm_named_section (name, flags, decl);
46349 #endif /* TARGET_SOLARIS */
46351 /* Return the mangling of TYPE if it is an extended fundamental type. */
46353 static const char *
46354 ix86_mangle_type (const_tree type)
46356 type = TYPE_MAIN_VARIANT (type);
46358 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
46359 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
46360 return NULL;
46362 switch (TYPE_MODE (type))
46364 case TFmode:
46365 /* __float128 is "g". */
46366 return "g";
46367 case XFmode:
46368 /* "long double" or __float80 is "e". */
46369 return "e";
46370 default:
46371 return NULL;
46375 /* For 32-bit code we can save PIC register setup by using
46376 __stack_chk_fail_local hidden function instead of calling
46377 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
46378 register, so it is better to call __stack_chk_fail directly. */
46380 static tree ATTRIBUTE_UNUSED
46381 ix86_stack_protect_fail (void)
46383 return TARGET_64BIT
46384 ? default_external_stack_protect_fail ()
46385 : default_hidden_stack_protect_fail ();
46388 /* Select a format to encode pointers in exception handling data. CODE
46389 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
46390 true if the symbol may be affected by dynamic relocations.
46392 ??? All x86 object file formats are capable of representing this.
46393 After all, the relocation needed is the same as for the call insn.
46394 Whether or not a particular assembler allows us to enter such, I
46395 guess we'll have to see. */
46397 asm_preferred_eh_data_format (int code, int global)
46399 if (flag_pic)
46401 int type = DW_EH_PE_sdata8;
46402 if (!TARGET_64BIT
46403 || ix86_cmodel == CM_SMALL_PIC
46404 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
46405 type = DW_EH_PE_sdata4;
46406 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
46408 if (ix86_cmodel == CM_SMALL
46409 || (ix86_cmodel == CM_MEDIUM && code))
46410 return DW_EH_PE_udata4;
46411 return DW_EH_PE_absptr;
46414 /* Expand copysign from SIGN to the positive value ABS_VALUE
46415 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
46416 the sign-bit. */
46417 static void
46418 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
46420 machine_mode mode = GET_MODE (sign);
46421 rtx sgn = gen_reg_rtx (mode);
46422 if (mask == NULL_RTX)
46424 machine_mode vmode;
46426 if (mode == SFmode)
46427 vmode = V4SFmode;
46428 else if (mode == DFmode)
46429 vmode = V2DFmode;
46430 else
46431 vmode = mode;
46433 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
46434 if (!VECTOR_MODE_P (mode))
46436 /* We need to generate a scalar mode mask in this case. */
46437 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
46438 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
46439 mask = gen_reg_rtx (mode);
46440 emit_insn (gen_rtx_SET (mask, tmp));
46443 else
46444 mask = gen_rtx_NOT (mode, mask);
46445 emit_insn (gen_rtx_SET (sgn, gen_rtx_AND (mode, mask, sign)));
46446 emit_insn (gen_rtx_SET (result, gen_rtx_IOR (mode, abs_value, sgn)));
46449 /* Expand fabs (OP0) and return a new rtx that holds the result. The
46450 mask for masking out the sign-bit is stored in *SMASK, if that is
46451 non-null. */
46452 static rtx
46453 ix86_expand_sse_fabs (rtx op0, rtx *smask)
46455 machine_mode vmode, mode = GET_MODE (op0);
46456 rtx xa, mask;
46458 xa = gen_reg_rtx (mode);
46459 if (mode == SFmode)
46460 vmode = V4SFmode;
46461 else if (mode == DFmode)
46462 vmode = V2DFmode;
46463 else
46464 vmode = mode;
46465 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
46466 if (!VECTOR_MODE_P (mode))
46468 /* We need to generate a scalar mode mask in this case. */
46469 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
46470 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
46471 mask = gen_reg_rtx (mode);
46472 emit_insn (gen_rtx_SET (mask, tmp));
46474 emit_insn (gen_rtx_SET (xa, gen_rtx_AND (mode, op0, mask)));
46476 if (smask)
46477 *smask = mask;
46479 return xa;
46482 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
46483 swapping the operands if SWAP_OPERANDS is true. The expanded
46484 code is a forward jump to a newly created label in case the
46485 comparison is true. The generated label rtx is returned. */
46486 static rtx_code_label *
46487 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
46488 bool swap_operands)
46490 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
46491 rtx_code_label *label;
46492 rtx tmp;
46494 if (swap_operands)
46495 std::swap (op0, op1);
46497 label = gen_label_rtx ();
46498 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
46499 emit_insn (gen_rtx_SET (tmp, gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
46500 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
46501 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
46502 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
46503 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
46504 JUMP_LABEL (tmp) = label;
46506 return label;
46509 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
46510 using comparison code CODE. Operands are swapped for the comparison if
46511 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
46512 static rtx
46513 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
46514 bool swap_operands)
46516 rtx (*insn)(rtx, rtx, rtx, rtx);
46517 machine_mode mode = GET_MODE (op0);
46518 rtx mask = gen_reg_rtx (mode);
46520 if (swap_operands)
46521 std::swap (op0, op1);
46523 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
46525 emit_insn (insn (mask, op0, op1,
46526 gen_rtx_fmt_ee (code, mode, op0, op1)));
46527 return mask;
46530 /* Generate and return a rtx of mode MODE for 2**n where n is the number
46531 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
46532 static rtx
46533 ix86_gen_TWO52 (machine_mode mode)
46535 REAL_VALUE_TYPE TWO52r;
46536 rtx TWO52;
46538 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
46539 TWO52 = const_double_from_real_value (TWO52r, mode);
46540 TWO52 = force_reg (mode, TWO52);
46542 return TWO52;
46545 /* Expand SSE sequence for computing lround from OP1 storing
46546 into OP0. */
46547 void
46548 ix86_expand_lround (rtx op0, rtx op1)
46550 /* C code for the stuff we're doing below:
46551 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
46552 return (long)tmp;
46554 machine_mode mode = GET_MODE (op1);
46555 const struct real_format *fmt;
46556 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46557 rtx adj;
46559 /* load nextafter (0.5, 0.0) */
46560 fmt = REAL_MODE_FORMAT (mode);
46561 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46562 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46564 /* adj = copysign (0.5, op1) */
46565 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
46566 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
46568 /* adj = op1 + adj */
46569 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
46571 /* op0 = (imode)adj */
46572 expand_fix (op0, adj, 0);
46575 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
46576 into OPERAND0. */
46577 void
46578 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
46580 /* C code for the stuff we're doing below (for do_floor):
46581 xi = (long)op1;
46582 xi -= (double)xi > op1 ? 1 : 0;
46583 return xi;
46585 machine_mode fmode = GET_MODE (op1);
46586 machine_mode imode = GET_MODE (op0);
46587 rtx ireg, freg, tmp;
46588 rtx_code_label *label;
46590 /* reg = (long)op1 */
46591 ireg = gen_reg_rtx (imode);
46592 expand_fix (ireg, op1, 0);
46594 /* freg = (double)reg */
46595 freg = gen_reg_rtx (fmode);
46596 expand_float (freg, ireg, 0);
46598 /* ireg = (freg > op1) ? ireg - 1 : ireg */
46599 label = ix86_expand_sse_compare_and_jump (UNLE,
46600 freg, op1, !do_floor);
46601 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
46602 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
46603 emit_move_insn (ireg, tmp);
46605 emit_label (label);
46606 LABEL_NUSES (label) = 1;
46608 emit_move_insn (op0, ireg);
46611 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
46612 result in OPERAND0. */
46613 void
46614 ix86_expand_rint (rtx operand0, rtx operand1)
46616 /* C code for the stuff we're doing below:
46617 xa = fabs (operand1);
46618 if (!isless (xa, 2**52))
46619 return operand1;
46620 xa = xa + 2**52 - 2**52;
46621 return copysign (xa, operand1);
46623 machine_mode mode = GET_MODE (operand0);
46624 rtx res, xa, TWO52, mask;
46625 rtx_code_label *label;
46627 res = gen_reg_rtx (mode);
46628 emit_move_insn (res, operand1);
46630 /* xa = abs (operand1) */
46631 xa = ix86_expand_sse_fabs (res, &mask);
46633 /* if (!isless (xa, TWO52)) goto label; */
46634 TWO52 = ix86_gen_TWO52 (mode);
46635 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46637 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46638 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46640 ix86_sse_copysign_to_positive (res, xa, res, mask);
46642 emit_label (label);
46643 LABEL_NUSES (label) = 1;
46645 emit_move_insn (operand0, res);
46648 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46649 into OPERAND0. */
46650 void
46651 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
46653 /* C code for the stuff we expand below.
46654 double xa = fabs (x), x2;
46655 if (!isless (xa, TWO52))
46656 return x;
46657 xa = xa + TWO52 - TWO52;
46658 x2 = copysign (xa, x);
46659 Compensate. Floor:
46660 if (x2 > x)
46661 x2 -= 1;
46662 Compensate. Ceil:
46663 if (x2 < x)
46664 x2 -= -1;
46665 return x2;
46667 machine_mode mode = GET_MODE (operand0);
46668 rtx xa, TWO52, tmp, one, res, mask;
46669 rtx_code_label *label;
46671 TWO52 = ix86_gen_TWO52 (mode);
46673 /* Temporary for holding the result, initialized to the input
46674 operand to ease control flow. */
46675 res = gen_reg_rtx (mode);
46676 emit_move_insn (res, operand1);
46678 /* xa = abs (operand1) */
46679 xa = ix86_expand_sse_fabs (res, &mask);
46681 /* if (!isless (xa, TWO52)) goto label; */
46682 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46684 /* xa = xa + TWO52 - TWO52; */
46685 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46686 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46688 /* xa = copysign (xa, operand1) */
46689 ix86_sse_copysign_to_positive (xa, xa, res, mask);
46691 /* generate 1.0 or -1.0 */
46692 one = force_reg (mode,
46693 const_double_from_real_value (do_floor
46694 ? dconst1 : dconstm1, mode));
46696 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46697 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46698 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
46699 /* We always need to subtract here to preserve signed zero. */
46700 tmp = expand_simple_binop (mode, MINUS,
46701 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46702 emit_move_insn (res, tmp);
46704 emit_label (label);
46705 LABEL_NUSES (label) = 1;
46707 emit_move_insn (operand0, res);
46710 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46711 into OPERAND0. */
46712 void
46713 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
46715 /* C code for the stuff we expand below.
46716 double xa = fabs (x), x2;
46717 if (!isless (xa, TWO52))
46718 return x;
46719 x2 = (double)(long)x;
46720 Compensate. Floor:
46721 if (x2 > x)
46722 x2 -= 1;
46723 Compensate. Ceil:
46724 if (x2 < x)
46725 x2 += 1;
46726 if (HONOR_SIGNED_ZEROS (mode))
46727 return copysign (x2, x);
46728 return x2;
46730 machine_mode mode = GET_MODE (operand0);
46731 rtx xa, xi, TWO52, tmp, one, res, mask;
46732 rtx_code_label *label;
46734 TWO52 = ix86_gen_TWO52 (mode);
46736 /* Temporary for holding the result, initialized to the input
46737 operand to ease control flow. */
46738 res = gen_reg_rtx (mode);
46739 emit_move_insn (res, operand1);
46741 /* xa = abs (operand1) */
46742 xa = ix86_expand_sse_fabs (res, &mask);
46744 /* if (!isless (xa, TWO52)) goto label; */
46745 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46747 /* xa = (double)(long)x */
46748 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46749 expand_fix (xi, res, 0);
46750 expand_float (xa, xi, 0);
46752 /* generate 1.0 */
46753 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46755 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46756 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46757 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
46758 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
46759 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46760 emit_move_insn (res, tmp);
46762 if (HONOR_SIGNED_ZEROS (mode))
46763 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46765 emit_label (label);
46766 LABEL_NUSES (label) = 1;
46768 emit_move_insn (operand0, res);
46771 /* Expand SSE sequence for computing round from OPERAND1 storing
46772 into OPERAND0. Sequence that works without relying on DImode truncation
46773 via cvttsd2siq that is only available on 64bit targets. */
46774 void
46775 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
46777 /* C code for the stuff we expand below.
46778 double xa = fabs (x), xa2, x2;
46779 if (!isless (xa, TWO52))
46780 return x;
46781 Using the absolute value and copying back sign makes
46782 -0.0 -> -0.0 correct.
46783 xa2 = xa + TWO52 - TWO52;
46784 Compensate.
46785 dxa = xa2 - xa;
46786 if (dxa <= -0.5)
46787 xa2 += 1;
46788 else if (dxa > 0.5)
46789 xa2 -= 1;
46790 x2 = copysign (xa2, x);
46791 return x2;
46793 machine_mode mode = GET_MODE (operand0);
46794 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
46795 rtx_code_label *label;
46797 TWO52 = ix86_gen_TWO52 (mode);
46799 /* Temporary for holding the result, initialized to the input
46800 operand to ease control flow. */
46801 res = gen_reg_rtx (mode);
46802 emit_move_insn (res, operand1);
46804 /* xa = abs (operand1) */
46805 xa = ix86_expand_sse_fabs (res, &mask);
46807 /* if (!isless (xa, TWO52)) goto label; */
46808 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46810 /* xa2 = xa + TWO52 - TWO52; */
46811 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46812 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
46814 /* dxa = xa2 - xa; */
46815 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
46817 /* generate 0.5, 1.0 and -0.5 */
46818 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
46819 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
46820 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
46821 0, OPTAB_DIRECT);
46823 /* Compensate. */
46824 tmp = gen_reg_rtx (mode);
46825 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
46826 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
46827 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
46828 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46829 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
46830 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
46831 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
46832 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46834 /* res = copysign (xa2, operand1) */
46835 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
46837 emit_label (label);
46838 LABEL_NUSES (label) = 1;
46840 emit_move_insn (operand0, res);
46843 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46844 into OPERAND0. */
46845 void
46846 ix86_expand_trunc (rtx operand0, rtx operand1)
46848 /* C code for SSE variant we expand below.
46849 double xa = fabs (x), x2;
46850 if (!isless (xa, TWO52))
46851 return x;
46852 x2 = (double)(long)x;
46853 if (HONOR_SIGNED_ZEROS (mode))
46854 return copysign (x2, x);
46855 return x2;
46857 machine_mode mode = GET_MODE (operand0);
46858 rtx xa, xi, TWO52, res, mask;
46859 rtx_code_label *label;
46861 TWO52 = ix86_gen_TWO52 (mode);
46863 /* Temporary for holding the result, initialized to the input
46864 operand to ease control flow. */
46865 res = gen_reg_rtx (mode);
46866 emit_move_insn (res, operand1);
46868 /* xa = abs (operand1) */
46869 xa = ix86_expand_sse_fabs (res, &mask);
46871 /* if (!isless (xa, TWO52)) goto label; */
46872 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46874 /* x = (double)(long)x */
46875 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46876 expand_fix (xi, res, 0);
46877 expand_float (res, xi, 0);
46879 if (HONOR_SIGNED_ZEROS (mode))
46880 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46882 emit_label (label);
46883 LABEL_NUSES (label) = 1;
46885 emit_move_insn (operand0, res);
46888 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46889 into OPERAND0. */
46890 void
46891 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
46893 machine_mode mode = GET_MODE (operand0);
46894 rtx xa, mask, TWO52, one, res, smask, tmp;
46895 rtx_code_label *label;
46897 /* C code for SSE variant we expand below.
46898 double xa = fabs (x), x2;
46899 if (!isless (xa, TWO52))
46900 return x;
46901 xa2 = xa + TWO52 - TWO52;
46902 Compensate:
46903 if (xa2 > xa)
46904 xa2 -= 1.0;
46905 x2 = copysign (xa2, x);
46906 return x2;
46909 TWO52 = ix86_gen_TWO52 (mode);
46911 /* Temporary for holding the result, initialized to the input
46912 operand to ease control flow. */
46913 res = gen_reg_rtx (mode);
46914 emit_move_insn (res, operand1);
46916 /* xa = abs (operand1) */
46917 xa = ix86_expand_sse_fabs (res, &smask);
46919 /* if (!isless (xa, TWO52)) goto label; */
46920 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46922 /* res = xa + TWO52 - TWO52; */
46923 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46924 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
46925 emit_move_insn (res, tmp);
46927 /* generate 1.0 */
46928 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46930 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
46931 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
46932 emit_insn (gen_rtx_SET (mask, gen_rtx_AND (mode, mask, one)));
46933 tmp = expand_simple_binop (mode, MINUS,
46934 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
46935 emit_move_insn (res, tmp);
46937 /* res = copysign (res, operand1) */
46938 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
46940 emit_label (label);
46941 LABEL_NUSES (label) = 1;
46943 emit_move_insn (operand0, res);
46946 /* Expand SSE sequence for computing round from OPERAND1 storing
46947 into OPERAND0. */
46948 void
46949 ix86_expand_round (rtx operand0, rtx operand1)
46951 /* C code for the stuff we're doing below:
46952 double xa = fabs (x);
46953 if (!isless (xa, TWO52))
46954 return x;
46955 xa = (double)(long)(xa + nextafter (0.5, 0.0));
46956 return copysign (xa, x);
46958 machine_mode mode = GET_MODE (operand0);
46959 rtx res, TWO52, xa, xi, half, mask;
46960 rtx_code_label *label;
46961 const struct real_format *fmt;
46962 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46964 /* Temporary for holding the result, initialized to the input
46965 operand to ease control flow. */
46966 res = gen_reg_rtx (mode);
46967 emit_move_insn (res, operand1);
46969 TWO52 = ix86_gen_TWO52 (mode);
46970 xa = ix86_expand_sse_fabs (res, &mask);
46971 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46973 /* load nextafter (0.5, 0.0) */
46974 fmt = REAL_MODE_FORMAT (mode);
46975 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46976 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46978 /* xa = xa + 0.5 */
46979 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
46980 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
46982 /* xa = (double)(int64_t)xa */
46983 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46984 expand_fix (xi, xa, 0);
46985 expand_float (xa, xi, 0);
46987 /* res = copysign (xa, operand1) */
46988 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
46990 emit_label (label);
46991 LABEL_NUSES (label) = 1;
46993 emit_move_insn (operand0, res);
46996 /* Expand SSE sequence for computing round
46997 from OP1 storing into OP0 using sse4 round insn. */
46998 void
46999 ix86_expand_round_sse4 (rtx op0, rtx op1)
47001 machine_mode mode = GET_MODE (op0);
47002 rtx e1, e2, res, half;
47003 const struct real_format *fmt;
47004 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
47005 rtx (*gen_copysign) (rtx, rtx, rtx);
47006 rtx (*gen_round) (rtx, rtx, rtx);
47008 switch (mode)
47010 case SFmode:
47011 gen_copysign = gen_copysignsf3;
47012 gen_round = gen_sse4_1_roundsf2;
47013 break;
47014 case DFmode:
47015 gen_copysign = gen_copysigndf3;
47016 gen_round = gen_sse4_1_rounddf2;
47017 break;
47018 default:
47019 gcc_unreachable ();
47022 /* round (a) = trunc (a + copysign (0.5, a)) */
47024 /* load nextafter (0.5, 0.0) */
47025 fmt = REAL_MODE_FORMAT (mode);
47026 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
47027 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
47028 half = const_double_from_real_value (pred_half, mode);
47030 /* e1 = copysign (0.5, op1) */
47031 e1 = gen_reg_rtx (mode);
47032 emit_insn (gen_copysign (e1, half, op1));
47034 /* e2 = op1 + e1 */
47035 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
47037 /* res = trunc (e2) */
47038 res = gen_reg_rtx (mode);
47039 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
47041 emit_move_insn (op0, res);
47045 /* Table of valid machine attributes. */
47046 static const struct attribute_spec ix86_attribute_table[] =
47048 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
47049 affects_type_identity } */
47050 /* Stdcall attribute says callee is responsible for popping arguments
47051 if they are not variable. */
47052 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
47053 true },
47054 /* Fastcall attribute says callee is responsible for popping arguments
47055 if they are not variable. */
47056 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
47057 true },
47058 /* Thiscall attribute says callee is responsible for popping arguments
47059 if they are not variable. */
47060 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
47061 true },
47062 /* Cdecl attribute says the callee is a normal C declaration */
47063 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
47064 true },
47065 /* Regparm attribute specifies how many integer arguments are to be
47066 passed in registers. */
47067 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
47068 true },
47069 /* Sseregparm attribute says we are using x86_64 calling conventions
47070 for FP arguments. */
47071 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
47072 true },
47073 /* The transactional memory builtins are implicitly regparm or fastcall
47074 depending on the ABI. Override the generic do-nothing attribute that
47075 these builtins were declared with. */
47076 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
47077 true },
47078 /* force_align_arg_pointer says this function realigns the stack at entry. */
47079 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
47080 false, true, true, ix86_handle_cconv_attribute, false },
47081 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
47082 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
47083 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
47084 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
47085 false },
47086 #endif
47087 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
47088 false },
47089 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
47090 false },
47091 #ifdef SUBTARGET_ATTRIBUTE_TABLE
47092 SUBTARGET_ATTRIBUTE_TABLE,
47093 #endif
47094 /* ms_abi and sysv_abi calling convention function attributes. */
47095 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
47096 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
47097 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
47098 false },
47099 { "callee_pop_aggregate_return", 1, 1, false, true, true,
47100 ix86_handle_callee_pop_aggregate_return, true },
47101 /* End element. */
47102 { NULL, 0, 0, false, false, false, NULL, false }
47105 /* Implement targetm.vectorize.builtin_vectorization_cost. */
47106 static int
47107 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
47108 tree vectype, int)
47110 unsigned elements;
47112 switch (type_of_cost)
47114 case scalar_stmt:
47115 return ix86_cost->scalar_stmt_cost;
47117 case scalar_load:
47118 return ix86_cost->scalar_load_cost;
47120 case scalar_store:
47121 return ix86_cost->scalar_store_cost;
47123 case vector_stmt:
47124 return ix86_cost->vec_stmt_cost;
47126 case vector_load:
47127 return ix86_cost->vec_align_load_cost;
47129 case vector_store:
47130 return ix86_cost->vec_store_cost;
47132 case vec_to_scalar:
47133 return ix86_cost->vec_to_scalar_cost;
47135 case scalar_to_vec:
47136 return ix86_cost->scalar_to_vec_cost;
47138 case unaligned_load:
47139 case unaligned_store:
47140 return ix86_cost->vec_unalign_load_cost;
47142 case cond_branch_taken:
47143 return ix86_cost->cond_taken_branch_cost;
47145 case cond_branch_not_taken:
47146 return ix86_cost->cond_not_taken_branch_cost;
47148 case vec_perm:
47149 case vec_promote_demote:
47150 return ix86_cost->vec_stmt_cost;
47152 case vec_construct:
47153 elements = TYPE_VECTOR_SUBPARTS (vectype);
47154 return ix86_cost->vec_stmt_cost * (elements / 2 + 1);
47156 default:
47157 gcc_unreachable ();
47161 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
47162 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
47163 insn every time. */
47165 static GTY(()) rtx_insn *vselect_insn;
47167 /* Initialize vselect_insn. */
47169 static void
47170 init_vselect_insn (void)
47172 unsigned i;
47173 rtx x;
47175 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
47176 for (i = 0; i < MAX_VECT_LEN; ++i)
47177 XVECEXP (x, 0, i) = const0_rtx;
47178 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
47179 const0_rtx), x);
47180 x = gen_rtx_SET (const0_rtx, x);
47181 start_sequence ();
47182 vselect_insn = emit_insn (x);
47183 end_sequence ();
47186 /* Construct (set target (vec_select op0 (parallel perm))) and
47187 return true if that's a valid instruction in the active ISA. */
47189 static bool
47190 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
47191 unsigned nelt, bool testing_p)
47193 unsigned int i;
47194 rtx x, save_vconcat;
47195 int icode;
47197 if (vselect_insn == NULL_RTX)
47198 init_vselect_insn ();
47200 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
47201 PUT_NUM_ELEM (XVEC (x, 0), nelt);
47202 for (i = 0; i < nelt; ++i)
47203 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
47204 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
47205 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
47206 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
47207 SET_DEST (PATTERN (vselect_insn)) = target;
47208 icode = recog_memoized (vselect_insn);
47210 if (icode >= 0 && !testing_p)
47211 emit_insn (copy_rtx (PATTERN (vselect_insn)));
47213 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
47214 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
47215 INSN_CODE (vselect_insn) = -1;
47217 return icode >= 0;
47220 /* Similar, but generate a vec_concat from op0 and op1 as well. */
47222 static bool
47223 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
47224 const unsigned char *perm, unsigned nelt,
47225 bool testing_p)
47227 machine_mode v2mode;
47228 rtx x;
47229 bool ok;
47231 if (vselect_insn == NULL_RTX)
47232 init_vselect_insn ();
47234 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
47235 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
47236 PUT_MODE (x, v2mode);
47237 XEXP (x, 0) = op0;
47238 XEXP (x, 1) = op1;
47239 ok = expand_vselect (target, x, perm, nelt, testing_p);
47240 XEXP (x, 0) = const0_rtx;
47241 XEXP (x, 1) = const0_rtx;
47242 return ok;
47245 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47246 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
47248 static bool
47249 expand_vec_perm_blend (struct expand_vec_perm_d *d)
47251 machine_mode mmode, vmode = d->vmode;
47252 unsigned i, mask, nelt = d->nelt;
47253 rtx target, op0, op1, maskop, x;
47254 rtx rperm[32], vperm;
47256 if (d->one_operand_p)
47257 return false;
47258 if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
47259 && (TARGET_AVX512BW
47260 || GET_MODE_UNIT_SIZE (vmode) >= 4))
47262 else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
47264 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
47266 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
47268 else
47269 return false;
47271 /* This is a blend, not a permute. Elements must stay in their
47272 respective lanes. */
47273 for (i = 0; i < nelt; ++i)
47275 unsigned e = d->perm[i];
47276 if (!(e == i || e == i + nelt))
47277 return false;
47280 if (d->testing_p)
47281 return true;
47283 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
47284 decision should be extracted elsewhere, so that we only try that
47285 sequence once all budget==3 options have been tried. */
47286 target = d->target;
47287 op0 = d->op0;
47288 op1 = d->op1;
47289 mask = 0;
47291 switch (vmode)
47293 case V8DFmode:
47294 case V16SFmode:
47295 case V4DFmode:
47296 case V8SFmode:
47297 case V2DFmode:
47298 case V4SFmode:
47299 case V8HImode:
47300 case V8SImode:
47301 case V32HImode:
47302 case V64QImode:
47303 case V16SImode:
47304 case V8DImode:
47305 for (i = 0; i < nelt; ++i)
47306 mask |= (d->perm[i] >= nelt) << i;
47307 break;
47309 case V2DImode:
47310 for (i = 0; i < 2; ++i)
47311 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
47312 vmode = V8HImode;
47313 goto do_subreg;
47315 case V4SImode:
47316 for (i = 0; i < 4; ++i)
47317 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
47318 vmode = V8HImode;
47319 goto do_subreg;
47321 case V16QImode:
47322 /* See if bytes move in pairs so we can use pblendw with
47323 an immediate argument, rather than pblendvb with a vector
47324 argument. */
47325 for (i = 0; i < 16; i += 2)
47326 if (d->perm[i] + 1 != d->perm[i + 1])
47328 use_pblendvb:
47329 for (i = 0; i < nelt; ++i)
47330 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
47332 finish_pblendvb:
47333 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
47334 vperm = force_reg (vmode, vperm);
47336 if (GET_MODE_SIZE (vmode) == 16)
47337 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
47338 else
47339 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
47340 if (target != d->target)
47341 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47342 return true;
47345 for (i = 0; i < 8; ++i)
47346 mask |= (d->perm[i * 2] >= 16) << i;
47347 vmode = V8HImode;
47348 /* FALLTHRU */
47350 do_subreg:
47351 target = gen_reg_rtx (vmode);
47352 op0 = gen_lowpart (vmode, op0);
47353 op1 = gen_lowpart (vmode, op1);
47354 break;
47356 case V32QImode:
47357 /* See if bytes move in pairs. If not, vpblendvb must be used. */
47358 for (i = 0; i < 32; i += 2)
47359 if (d->perm[i] + 1 != d->perm[i + 1])
47360 goto use_pblendvb;
47361 /* See if bytes move in quadruplets. If yes, vpblendd
47362 with immediate can be used. */
47363 for (i = 0; i < 32; i += 4)
47364 if (d->perm[i] + 2 != d->perm[i + 2])
47365 break;
47366 if (i < 32)
47368 /* See if bytes move the same in both lanes. If yes,
47369 vpblendw with immediate can be used. */
47370 for (i = 0; i < 16; i += 2)
47371 if (d->perm[i] + 16 != d->perm[i + 16])
47372 goto use_pblendvb;
47374 /* Use vpblendw. */
47375 for (i = 0; i < 16; ++i)
47376 mask |= (d->perm[i * 2] >= 32) << i;
47377 vmode = V16HImode;
47378 goto do_subreg;
47381 /* Use vpblendd. */
47382 for (i = 0; i < 8; ++i)
47383 mask |= (d->perm[i * 4] >= 32) << i;
47384 vmode = V8SImode;
47385 goto do_subreg;
47387 case V16HImode:
47388 /* See if words move in pairs. If yes, vpblendd can be used. */
47389 for (i = 0; i < 16; i += 2)
47390 if (d->perm[i] + 1 != d->perm[i + 1])
47391 break;
47392 if (i < 16)
47394 /* See if words move the same in both lanes. If not,
47395 vpblendvb must be used. */
47396 for (i = 0; i < 8; i++)
47397 if (d->perm[i] + 8 != d->perm[i + 8])
47399 /* Use vpblendvb. */
47400 for (i = 0; i < 32; ++i)
47401 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
47403 vmode = V32QImode;
47404 nelt = 32;
47405 target = gen_reg_rtx (vmode);
47406 op0 = gen_lowpart (vmode, op0);
47407 op1 = gen_lowpart (vmode, op1);
47408 goto finish_pblendvb;
47411 /* Use vpblendw. */
47412 for (i = 0; i < 16; ++i)
47413 mask |= (d->perm[i] >= 16) << i;
47414 break;
47417 /* Use vpblendd. */
47418 for (i = 0; i < 8; ++i)
47419 mask |= (d->perm[i * 2] >= 16) << i;
47420 vmode = V8SImode;
47421 goto do_subreg;
47423 case V4DImode:
47424 /* Use vpblendd. */
47425 for (i = 0; i < 4; ++i)
47426 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
47427 vmode = V8SImode;
47428 goto do_subreg;
47430 default:
47431 gcc_unreachable ();
47434 switch (vmode)
47436 case V8DFmode:
47437 case V8DImode:
47438 mmode = QImode;
47439 break;
47440 case V16SFmode:
47441 case V16SImode:
47442 mmode = HImode;
47443 break;
47444 case V32HImode:
47445 mmode = SImode;
47446 break;
47447 case V64QImode:
47448 mmode = DImode;
47449 break;
47450 default:
47451 mmode = VOIDmode;
47454 if (mmode != VOIDmode)
47455 maskop = force_reg (mmode, gen_int_mode (mask, mmode));
47456 else
47457 maskop = GEN_INT (mask);
47459 /* This matches five different patterns with the different modes. */
47460 x = gen_rtx_VEC_MERGE (vmode, op1, op0, maskop);
47461 x = gen_rtx_SET (target, x);
47462 emit_insn (x);
47463 if (target != d->target)
47464 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47466 return true;
47469 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47470 in terms of the variable form of vpermilps.
47472 Note that we will have already failed the immediate input vpermilps,
47473 which requires that the high and low part shuffle be identical; the
47474 variable form doesn't require that. */
47476 static bool
47477 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
47479 rtx rperm[8], vperm;
47480 unsigned i;
47482 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
47483 return false;
47485 /* We can only permute within the 128-bit lane. */
47486 for (i = 0; i < 8; ++i)
47488 unsigned e = d->perm[i];
47489 if (i < 4 ? e >= 4 : e < 4)
47490 return false;
47493 if (d->testing_p)
47494 return true;
47496 for (i = 0; i < 8; ++i)
47498 unsigned e = d->perm[i];
47500 /* Within each 128-bit lane, the elements of op0 are numbered
47501 from 0 and the elements of op1 are numbered from 4. */
47502 if (e >= 8 + 4)
47503 e -= 8;
47504 else if (e >= 4)
47505 e -= 4;
47507 rperm[i] = GEN_INT (e);
47510 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
47511 vperm = force_reg (V8SImode, vperm);
47512 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
47514 return true;
47517 /* Return true if permutation D can be performed as VMODE permutation
47518 instead. */
47520 static bool
47521 valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d)
47523 unsigned int i, j, chunk;
47525 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
47526 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
47527 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
47528 return false;
47530 if (GET_MODE_NUNITS (vmode) >= d->nelt)
47531 return true;
47533 chunk = d->nelt / GET_MODE_NUNITS (vmode);
47534 for (i = 0; i < d->nelt; i += chunk)
47535 if (d->perm[i] & (chunk - 1))
47536 return false;
47537 else
47538 for (j = 1; j < chunk; ++j)
47539 if (d->perm[i] + j != d->perm[i + j])
47540 return false;
47542 return true;
47545 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47546 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
47548 static bool
47549 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
47551 unsigned i, nelt, eltsz, mask;
47552 unsigned char perm[64];
47553 machine_mode vmode = V16QImode;
47554 rtx rperm[64], vperm, target, op0, op1;
47556 nelt = d->nelt;
47558 if (!d->one_operand_p)
47560 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
47562 if (TARGET_AVX2
47563 && valid_perm_using_mode_p (V2TImode, d))
47565 if (d->testing_p)
47566 return true;
47568 /* Use vperm2i128 insn. The pattern uses
47569 V4DImode instead of V2TImode. */
47570 target = d->target;
47571 if (d->vmode != V4DImode)
47572 target = gen_reg_rtx (V4DImode);
47573 op0 = gen_lowpart (V4DImode, d->op0);
47574 op1 = gen_lowpart (V4DImode, d->op1);
47575 rperm[0]
47576 = GEN_INT ((d->perm[0] / (nelt / 2))
47577 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
47578 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
47579 if (target != d->target)
47580 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47581 return true;
47583 return false;
47586 else
47588 if (GET_MODE_SIZE (d->vmode) == 16)
47590 if (!TARGET_SSSE3)
47591 return false;
47593 else if (GET_MODE_SIZE (d->vmode) == 32)
47595 if (!TARGET_AVX2)
47596 return false;
47598 /* V4DImode should be already handled through
47599 expand_vselect by vpermq instruction. */
47600 gcc_assert (d->vmode != V4DImode);
47602 vmode = V32QImode;
47603 if (d->vmode == V8SImode
47604 || d->vmode == V16HImode
47605 || d->vmode == V32QImode)
47607 /* First see if vpermq can be used for
47608 V8SImode/V16HImode/V32QImode. */
47609 if (valid_perm_using_mode_p (V4DImode, d))
47611 for (i = 0; i < 4; i++)
47612 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
47613 if (d->testing_p)
47614 return true;
47615 target = gen_reg_rtx (V4DImode);
47616 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
47617 perm, 4, false))
47619 emit_move_insn (d->target,
47620 gen_lowpart (d->vmode, target));
47621 return true;
47623 return false;
47626 /* Next see if vpermd can be used. */
47627 if (valid_perm_using_mode_p (V8SImode, d))
47628 vmode = V8SImode;
47630 /* Or if vpermps can be used. */
47631 else if (d->vmode == V8SFmode)
47632 vmode = V8SImode;
47634 if (vmode == V32QImode)
47636 /* vpshufb only works intra lanes, it is not
47637 possible to shuffle bytes in between the lanes. */
47638 for (i = 0; i < nelt; ++i)
47639 if ((d->perm[i] ^ i) & (nelt / 2))
47640 return false;
47643 else if (GET_MODE_SIZE (d->vmode) == 64)
47645 if (!TARGET_AVX512BW)
47646 return false;
47648 /* If vpermq didn't work, vpshufb won't work either. */
47649 if (d->vmode == V8DFmode || d->vmode == V8DImode)
47650 return false;
47652 vmode = V64QImode;
47653 if (d->vmode == V16SImode
47654 || d->vmode == V32HImode
47655 || d->vmode == V64QImode)
47657 /* First see if vpermq can be used for
47658 V16SImode/V32HImode/V64QImode. */
47659 if (valid_perm_using_mode_p (V8DImode, d))
47661 for (i = 0; i < 8; i++)
47662 perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7;
47663 if (d->testing_p)
47664 return true;
47665 target = gen_reg_rtx (V8DImode);
47666 if (expand_vselect (target, gen_lowpart (V8DImode, d->op0),
47667 perm, 8, false))
47669 emit_move_insn (d->target,
47670 gen_lowpart (d->vmode, target));
47671 return true;
47673 return false;
47676 /* Next see if vpermd can be used. */
47677 if (valid_perm_using_mode_p (V16SImode, d))
47678 vmode = V16SImode;
47680 /* Or if vpermps can be used. */
47681 else if (d->vmode == V16SFmode)
47682 vmode = V16SImode;
47683 if (vmode == V64QImode)
47685 /* vpshufb only works intra lanes, it is not
47686 possible to shuffle bytes in between the lanes. */
47687 for (i = 0; i < nelt; ++i)
47688 if ((d->perm[i] ^ i) & (nelt / 4))
47689 return false;
47692 else
47693 return false;
47696 if (d->testing_p)
47697 return true;
47699 if (vmode == V8SImode)
47700 for (i = 0; i < 8; ++i)
47701 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
47702 else if (vmode == V16SImode)
47703 for (i = 0; i < 16; ++i)
47704 rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15);
47705 else
47707 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
47708 if (!d->one_operand_p)
47709 mask = 2 * nelt - 1;
47710 else if (vmode == V16QImode)
47711 mask = nelt - 1;
47712 else if (vmode == V64QImode)
47713 mask = nelt / 4 - 1;
47714 else
47715 mask = nelt / 2 - 1;
47717 for (i = 0; i < nelt; ++i)
47719 unsigned j, e = d->perm[i] & mask;
47720 for (j = 0; j < eltsz; ++j)
47721 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
47725 vperm = gen_rtx_CONST_VECTOR (vmode,
47726 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
47727 vperm = force_reg (vmode, vperm);
47729 target = d->target;
47730 if (d->vmode != vmode)
47731 target = gen_reg_rtx (vmode);
47732 op0 = gen_lowpart (vmode, d->op0);
47733 if (d->one_operand_p)
47735 if (vmode == V16QImode)
47736 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
47737 else if (vmode == V32QImode)
47738 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
47739 else if (vmode == V64QImode)
47740 emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
47741 else if (vmode == V8SFmode)
47742 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
47743 else if (vmode == V8SImode)
47744 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
47745 else if (vmode == V16SFmode)
47746 emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm));
47747 else if (vmode == V16SImode)
47748 emit_insn (gen_avx512f_permvarv16si (target, op0, vperm));
47749 else
47750 gcc_unreachable ();
47752 else
47754 op1 = gen_lowpart (vmode, d->op1);
47755 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
47757 if (target != d->target)
47758 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47760 return true;
47763 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
47764 in a single instruction. */
47766 static bool
47767 expand_vec_perm_1 (struct expand_vec_perm_d *d)
47769 unsigned i, nelt = d->nelt;
47770 unsigned char perm2[MAX_VECT_LEN];
47772 /* Check plain VEC_SELECT first, because AVX has instructions that could
47773 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
47774 input where SEL+CONCAT may not. */
47775 if (d->one_operand_p)
47777 int mask = nelt - 1;
47778 bool identity_perm = true;
47779 bool broadcast_perm = true;
47781 for (i = 0; i < nelt; i++)
47783 perm2[i] = d->perm[i] & mask;
47784 if (perm2[i] != i)
47785 identity_perm = false;
47786 if (perm2[i])
47787 broadcast_perm = false;
47790 if (identity_perm)
47792 if (!d->testing_p)
47793 emit_move_insn (d->target, d->op0);
47794 return true;
47796 else if (broadcast_perm && TARGET_AVX2)
47798 /* Use vpbroadcast{b,w,d}. */
47799 rtx (*gen) (rtx, rtx) = NULL;
47800 switch (d->vmode)
47802 case V64QImode:
47803 if (TARGET_AVX512BW)
47804 gen = gen_avx512bw_vec_dupv64qi_1;
47805 break;
47806 case V32QImode:
47807 gen = gen_avx2_pbroadcastv32qi_1;
47808 break;
47809 case V32HImode:
47810 if (TARGET_AVX512BW)
47811 gen = gen_avx512bw_vec_dupv32hi_1;
47812 break;
47813 case V16HImode:
47814 gen = gen_avx2_pbroadcastv16hi_1;
47815 break;
47816 case V16SImode:
47817 if (TARGET_AVX512F)
47818 gen = gen_avx512f_vec_dupv16si_1;
47819 break;
47820 case V8SImode:
47821 gen = gen_avx2_pbroadcastv8si_1;
47822 break;
47823 case V16QImode:
47824 gen = gen_avx2_pbroadcastv16qi;
47825 break;
47826 case V8HImode:
47827 gen = gen_avx2_pbroadcastv8hi;
47828 break;
47829 case V16SFmode:
47830 if (TARGET_AVX512F)
47831 gen = gen_avx512f_vec_dupv16sf_1;
47832 break;
47833 case V8SFmode:
47834 gen = gen_avx2_vec_dupv8sf_1;
47835 break;
47836 case V8DFmode:
47837 if (TARGET_AVX512F)
47838 gen = gen_avx512f_vec_dupv8df_1;
47839 break;
47840 case V8DImode:
47841 if (TARGET_AVX512F)
47842 gen = gen_avx512f_vec_dupv8di_1;
47843 break;
47844 /* For other modes prefer other shuffles this function creates. */
47845 default: break;
47847 if (gen != NULL)
47849 if (!d->testing_p)
47850 emit_insn (gen (d->target, d->op0));
47851 return true;
47855 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
47856 return true;
47858 /* There are plenty of patterns in sse.md that are written for
47859 SEL+CONCAT and are not replicated for a single op. Perhaps
47860 that should be changed, to avoid the nastiness here. */
47862 /* Recognize interleave style patterns, which means incrementing
47863 every other permutation operand. */
47864 for (i = 0; i < nelt; i += 2)
47866 perm2[i] = d->perm[i] & mask;
47867 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
47869 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47870 d->testing_p))
47871 return true;
47873 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
47874 if (nelt >= 4)
47876 for (i = 0; i < nelt; i += 4)
47878 perm2[i + 0] = d->perm[i + 0] & mask;
47879 perm2[i + 1] = d->perm[i + 1] & mask;
47880 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
47881 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
47884 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47885 d->testing_p))
47886 return true;
47890 /* Finally, try the fully general two operand permute. */
47891 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
47892 d->testing_p))
47893 return true;
47895 /* Recognize interleave style patterns with reversed operands. */
47896 if (!d->one_operand_p)
47898 for (i = 0; i < nelt; ++i)
47900 unsigned e = d->perm[i];
47901 if (e >= nelt)
47902 e -= nelt;
47903 else
47904 e += nelt;
47905 perm2[i] = e;
47908 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
47909 d->testing_p))
47910 return true;
47913 /* Try the SSE4.1 blend variable merge instructions. */
47914 if (expand_vec_perm_blend (d))
47915 return true;
47917 /* Try one of the AVX vpermil variable permutations. */
47918 if (expand_vec_perm_vpermil (d))
47919 return true;
47921 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
47922 vpshufb, vpermd, vpermps or vpermq variable permutation. */
47923 if (expand_vec_perm_pshufb (d))
47924 return true;
47926 /* Try the AVX2 vpalignr instruction. */
47927 if (expand_vec_perm_palignr (d, true))
47928 return true;
47930 /* Try the AVX512F vpermi2 instructions. */
47931 if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
47932 return true;
47934 return false;
47937 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47938 in terms of a pair of pshuflw + pshufhw instructions. */
47940 static bool
47941 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
47943 unsigned char perm2[MAX_VECT_LEN];
47944 unsigned i;
47945 bool ok;
47947 if (d->vmode != V8HImode || !d->one_operand_p)
47948 return false;
47950 /* The two permutations only operate in 64-bit lanes. */
47951 for (i = 0; i < 4; ++i)
47952 if (d->perm[i] >= 4)
47953 return false;
47954 for (i = 4; i < 8; ++i)
47955 if (d->perm[i] < 4)
47956 return false;
47958 if (d->testing_p)
47959 return true;
47961 /* Emit the pshuflw. */
47962 memcpy (perm2, d->perm, 4);
47963 for (i = 4; i < 8; ++i)
47964 perm2[i] = i;
47965 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
47966 gcc_assert (ok);
47968 /* Emit the pshufhw. */
47969 memcpy (perm2 + 4, d->perm + 4, 4);
47970 for (i = 0; i < 4; ++i)
47971 perm2[i] = i;
47972 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
47973 gcc_assert (ok);
47975 return true;
47978 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47979 the permutation using the SSSE3 palignr instruction. This succeeds
47980 when all of the elements in PERM fit within one vector and we merely
47981 need to shift them down so that a single vector permutation has a
47982 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
47983 the vpalignr instruction itself can perform the requested permutation. */
47985 static bool
47986 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
47988 unsigned i, nelt = d->nelt;
47989 unsigned min, max, minswap, maxswap;
47990 bool in_order, ok, swap = false;
47991 rtx shift, target;
47992 struct expand_vec_perm_d dcopy;
47994 /* Even with AVX, palignr only operates on 128-bit vectors,
47995 in AVX2 palignr operates on both 128-bit lanes. */
47996 if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
47997 && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
47998 return false;
48000 min = 2 * nelt;
48001 max = 0;
48002 minswap = 2 * nelt;
48003 maxswap = 0;
48004 for (i = 0; i < nelt; ++i)
48006 unsigned e = d->perm[i];
48007 unsigned eswap = d->perm[i] ^ nelt;
48008 if (GET_MODE_SIZE (d->vmode) == 32)
48010 e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
48011 eswap = e ^ (nelt / 2);
48013 if (e < min)
48014 min = e;
48015 if (e > max)
48016 max = e;
48017 if (eswap < minswap)
48018 minswap = eswap;
48019 if (eswap > maxswap)
48020 maxswap = eswap;
48022 if (min == 0
48023 || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
48025 if (d->one_operand_p
48026 || minswap == 0
48027 || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
48028 ? nelt / 2 : nelt))
48029 return false;
48030 swap = true;
48031 min = minswap;
48032 max = maxswap;
48035 /* Given that we have SSSE3, we know we'll be able to implement the
48036 single operand permutation after the palignr with pshufb for
48037 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
48038 first. */
48039 if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
48040 return true;
48042 dcopy = *d;
48043 if (swap)
48045 dcopy.op0 = d->op1;
48046 dcopy.op1 = d->op0;
48047 for (i = 0; i < nelt; ++i)
48048 dcopy.perm[i] ^= nelt;
48051 in_order = true;
48052 for (i = 0; i < nelt; ++i)
48054 unsigned e = dcopy.perm[i];
48055 if (GET_MODE_SIZE (d->vmode) == 32
48056 && e >= nelt
48057 && (e & (nelt / 2 - 1)) < min)
48058 e = e - min - (nelt / 2);
48059 else
48060 e = e - min;
48061 if (e != i)
48062 in_order = false;
48063 dcopy.perm[i] = e;
48065 dcopy.one_operand_p = true;
48067 if (single_insn_only_p && !in_order)
48068 return false;
48070 /* For AVX2, test whether we can permute the result in one instruction. */
48071 if (d->testing_p)
48073 if (in_order)
48074 return true;
48075 dcopy.op1 = dcopy.op0;
48076 return expand_vec_perm_1 (&dcopy);
48079 shift = GEN_INT (min * GET_MODE_UNIT_BITSIZE (d->vmode));
48080 if (GET_MODE_SIZE (d->vmode) == 16)
48082 target = gen_reg_rtx (TImode);
48083 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
48084 gen_lowpart (TImode, dcopy.op0), shift));
48086 else
48088 target = gen_reg_rtx (V2TImode);
48089 emit_insn (gen_avx2_palignrv2ti (target,
48090 gen_lowpart (V2TImode, dcopy.op1),
48091 gen_lowpart (V2TImode, dcopy.op0),
48092 shift));
48095 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
48097 /* Test for the degenerate case where the alignment by itself
48098 produces the desired permutation. */
48099 if (in_order)
48101 emit_move_insn (d->target, dcopy.op0);
48102 return true;
48105 ok = expand_vec_perm_1 (&dcopy);
48106 gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
48108 return ok;
48111 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
48112 the permutation using the SSE4_1 pblendv instruction. Potentially
48113 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
48115 static bool
48116 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
48118 unsigned i, which, nelt = d->nelt;
48119 struct expand_vec_perm_d dcopy, dcopy1;
48120 machine_mode vmode = d->vmode;
48121 bool ok;
48123 /* Use the same checks as in expand_vec_perm_blend. */
48124 if (d->one_operand_p)
48125 return false;
48126 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
48128 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
48130 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
48132 else
48133 return false;
48135 /* Figure out where permutation elements stay not in their
48136 respective lanes. */
48137 for (i = 0, which = 0; i < nelt; ++i)
48139 unsigned e = d->perm[i];
48140 if (e != i)
48141 which |= (e < nelt ? 1 : 2);
48143 /* We can pblend the part where elements stay not in their
48144 respective lanes only when these elements are all in one
48145 half of a permutation.
48146 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
48147 lanes, but both 8 and 9 >= 8
48148 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
48149 respective lanes and 8 >= 8, but 2 not. */
48150 if (which != 1 && which != 2)
48151 return false;
48152 if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
48153 return true;
48155 /* First we apply one operand permutation to the part where
48156 elements stay not in their respective lanes. */
48157 dcopy = *d;
48158 if (which == 2)
48159 dcopy.op0 = dcopy.op1 = d->op1;
48160 else
48161 dcopy.op0 = dcopy.op1 = d->op0;
48162 if (!d->testing_p)
48163 dcopy.target = gen_reg_rtx (vmode);
48164 dcopy.one_operand_p = true;
48166 for (i = 0; i < nelt; ++i)
48167 dcopy.perm[i] = d->perm[i] & (nelt - 1);
48169 ok = expand_vec_perm_1 (&dcopy);
48170 if (GET_MODE_SIZE (vmode) != 16 && !ok)
48171 return false;
48172 else
48173 gcc_assert (ok);
48174 if (d->testing_p)
48175 return true;
48177 /* Next we put permuted elements into their positions. */
48178 dcopy1 = *d;
48179 if (which == 2)
48180 dcopy1.op1 = dcopy.target;
48181 else
48182 dcopy1.op0 = dcopy.target;
48184 for (i = 0; i < nelt; ++i)
48185 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
48187 ok = expand_vec_perm_blend (&dcopy1);
48188 gcc_assert (ok);
48190 return true;
48193 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
48195 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
48196 a two vector permutation into a single vector permutation by using
48197 an interleave operation to merge the vectors. */
48199 static bool
48200 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
48202 struct expand_vec_perm_d dremap, dfinal;
48203 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
48204 unsigned HOST_WIDE_INT contents;
48205 unsigned char remap[2 * MAX_VECT_LEN];
48206 rtx_insn *seq;
48207 bool ok, same_halves = false;
48209 if (GET_MODE_SIZE (d->vmode) == 16)
48211 if (d->one_operand_p)
48212 return false;
48214 else if (GET_MODE_SIZE (d->vmode) == 32)
48216 if (!TARGET_AVX)
48217 return false;
48218 /* For 32-byte modes allow even d->one_operand_p.
48219 The lack of cross-lane shuffling in some instructions
48220 might prevent a single insn shuffle. */
48221 dfinal = *d;
48222 dfinal.testing_p = true;
48223 /* If expand_vec_perm_interleave3 can expand this into
48224 a 3 insn sequence, give up and let it be expanded as
48225 3 insn sequence. While that is one insn longer,
48226 it doesn't need a memory operand and in the common
48227 case that both interleave low and high permutations
48228 with the same operands are adjacent needs 4 insns
48229 for both after CSE. */
48230 if (expand_vec_perm_interleave3 (&dfinal))
48231 return false;
48233 else
48234 return false;
48236 /* Examine from whence the elements come. */
48237 contents = 0;
48238 for (i = 0; i < nelt; ++i)
48239 contents |= HOST_WIDE_INT_1U << d->perm[i];
48241 memset (remap, 0xff, sizeof (remap));
48242 dremap = *d;
48244 if (GET_MODE_SIZE (d->vmode) == 16)
48246 unsigned HOST_WIDE_INT h1, h2, h3, h4;
48248 /* Split the two input vectors into 4 halves. */
48249 h1 = (HOST_WIDE_INT_1U << nelt2) - 1;
48250 h2 = h1 << nelt2;
48251 h3 = h2 << nelt2;
48252 h4 = h3 << nelt2;
48254 /* If the elements from the low halves use interleave low, and similarly
48255 for interleave high. If the elements are from mis-matched halves, we
48256 can use shufps for V4SF/V4SI or do a DImode shuffle. */
48257 if ((contents & (h1 | h3)) == contents)
48259 /* punpckl* */
48260 for (i = 0; i < nelt2; ++i)
48262 remap[i] = i * 2;
48263 remap[i + nelt] = i * 2 + 1;
48264 dremap.perm[i * 2] = i;
48265 dremap.perm[i * 2 + 1] = i + nelt;
48267 if (!TARGET_SSE2 && d->vmode == V4SImode)
48268 dremap.vmode = V4SFmode;
48270 else if ((contents & (h2 | h4)) == contents)
48272 /* punpckh* */
48273 for (i = 0; i < nelt2; ++i)
48275 remap[i + nelt2] = i * 2;
48276 remap[i + nelt + nelt2] = i * 2 + 1;
48277 dremap.perm[i * 2] = i + nelt2;
48278 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
48280 if (!TARGET_SSE2 && d->vmode == V4SImode)
48281 dremap.vmode = V4SFmode;
48283 else if ((contents & (h1 | h4)) == contents)
48285 /* shufps */
48286 for (i = 0; i < nelt2; ++i)
48288 remap[i] = i;
48289 remap[i + nelt + nelt2] = i + nelt2;
48290 dremap.perm[i] = i;
48291 dremap.perm[i + nelt2] = i + nelt + nelt2;
48293 if (nelt != 4)
48295 /* shufpd */
48296 dremap.vmode = V2DImode;
48297 dremap.nelt = 2;
48298 dremap.perm[0] = 0;
48299 dremap.perm[1] = 3;
48302 else if ((contents & (h2 | h3)) == contents)
48304 /* shufps */
48305 for (i = 0; i < nelt2; ++i)
48307 remap[i + nelt2] = i;
48308 remap[i + nelt] = i + nelt2;
48309 dremap.perm[i] = i + nelt2;
48310 dremap.perm[i + nelt2] = i + nelt;
48312 if (nelt != 4)
48314 /* shufpd */
48315 dremap.vmode = V2DImode;
48316 dremap.nelt = 2;
48317 dremap.perm[0] = 1;
48318 dremap.perm[1] = 2;
48321 else
48322 return false;
48324 else
48326 unsigned int nelt4 = nelt / 4, nzcnt = 0;
48327 unsigned HOST_WIDE_INT q[8];
48328 unsigned int nonzero_halves[4];
48330 /* Split the two input vectors into 8 quarters. */
48331 q[0] = (HOST_WIDE_INT_1U << nelt4) - 1;
48332 for (i = 1; i < 8; ++i)
48333 q[i] = q[0] << (nelt4 * i);
48334 for (i = 0; i < 4; ++i)
48335 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
48337 nonzero_halves[nzcnt] = i;
48338 ++nzcnt;
48341 if (nzcnt == 1)
48343 gcc_assert (d->one_operand_p);
48344 nonzero_halves[1] = nonzero_halves[0];
48345 same_halves = true;
48347 else if (d->one_operand_p)
48349 gcc_assert (nonzero_halves[0] == 0);
48350 gcc_assert (nonzero_halves[1] == 1);
48353 if (nzcnt <= 2)
48355 if (d->perm[0] / nelt2 == nonzero_halves[1])
48357 /* Attempt to increase the likelihood that dfinal
48358 shuffle will be intra-lane. */
48359 std::swap (nonzero_halves[0], nonzero_halves[1]);
48362 /* vperm2f128 or vperm2i128. */
48363 for (i = 0; i < nelt2; ++i)
48365 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
48366 remap[i + nonzero_halves[0] * nelt2] = i;
48367 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
48368 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
48371 if (d->vmode != V8SFmode
48372 && d->vmode != V4DFmode
48373 && d->vmode != V8SImode)
48375 dremap.vmode = V8SImode;
48376 dremap.nelt = 8;
48377 for (i = 0; i < 4; ++i)
48379 dremap.perm[i] = i + nonzero_halves[0] * 4;
48380 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
48384 else if (d->one_operand_p)
48385 return false;
48386 else if (TARGET_AVX2
48387 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
48389 /* vpunpckl* */
48390 for (i = 0; i < nelt4; ++i)
48392 remap[i] = i * 2;
48393 remap[i + nelt] = i * 2 + 1;
48394 remap[i + nelt2] = i * 2 + nelt2;
48395 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
48396 dremap.perm[i * 2] = i;
48397 dremap.perm[i * 2 + 1] = i + nelt;
48398 dremap.perm[i * 2 + nelt2] = i + nelt2;
48399 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
48402 else if (TARGET_AVX2
48403 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
48405 /* vpunpckh* */
48406 for (i = 0; i < nelt4; ++i)
48408 remap[i + nelt4] = i * 2;
48409 remap[i + nelt + nelt4] = i * 2 + 1;
48410 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
48411 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
48412 dremap.perm[i * 2] = i + nelt4;
48413 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
48414 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
48415 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
48418 else
48419 return false;
48422 /* Use the remapping array set up above to move the elements from their
48423 swizzled locations into their final destinations. */
48424 dfinal = *d;
48425 for (i = 0; i < nelt; ++i)
48427 unsigned e = remap[d->perm[i]];
48428 gcc_assert (e < nelt);
48429 /* If same_halves is true, both halves of the remapped vector are the
48430 same. Avoid cross-lane accesses if possible. */
48431 if (same_halves && i >= nelt2)
48433 gcc_assert (e < nelt2);
48434 dfinal.perm[i] = e + nelt2;
48436 else
48437 dfinal.perm[i] = e;
48439 if (!d->testing_p)
48441 dremap.target = gen_reg_rtx (dremap.vmode);
48442 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
48444 dfinal.op1 = dfinal.op0;
48445 dfinal.one_operand_p = true;
48447 /* Test if the final remap can be done with a single insn. For V4SFmode or
48448 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
48449 start_sequence ();
48450 ok = expand_vec_perm_1 (&dfinal);
48451 seq = get_insns ();
48452 end_sequence ();
48454 if (!ok)
48455 return false;
48457 if (d->testing_p)
48458 return true;
48460 if (dremap.vmode != dfinal.vmode)
48462 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
48463 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
48466 ok = expand_vec_perm_1 (&dremap);
48467 gcc_assert (ok);
48469 emit_insn (seq);
48470 return true;
48473 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
48474 a single vector cross-lane permutation into vpermq followed
48475 by any of the single insn permutations. */
48477 static bool
48478 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
48480 struct expand_vec_perm_d dremap, dfinal;
48481 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
48482 unsigned contents[2];
48483 bool ok;
48485 if (!(TARGET_AVX2
48486 && (d->vmode == V32QImode || d->vmode == V16HImode)
48487 && d->one_operand_p))
48488 return false;
48490 contents[0] = 0;
48491 contents[1] = 0;
48492 for (i = 0; i < nelt2; ++i)
48494 contents[0] |= 1u << (d->perm[i] / nelt4);
48495 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
48498 for (i = 0; i < 2; ++i)
48500 unsigned int cnt = 0;
48501 for (j = 0; j < 4; ++j)
48502 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
48503 return false;
48506 if (d->testing_p)
48507 return true;
48509 dremap = *d;
48510 dremap.vmode = V4DImode;
48511 dremap.nelt = 4;
48512 dremap.target = gen_reg_rtx (V4DImode);
48513 dremap.op0 = gen_lowpart (V4DImode, d->op0);
48514 dremap.op1 = dremap.op0;
48515 dremap.one_operand_p = true;
48516 for (i = 0; i < 2; ++i)
48518 unsigned int cnt = 0;
48519 for (j = 0; j < 4; ++j)
48520 if ((contents[i] & (1u << j)) != 0)
48521 dremap.perm[2 * i + cnt++] = j;
48522 for (; cnt < 2; ++cnt)
48523 dremap.perm[2 * i + cnt] = 0;
48526 dfinal = *d;
48527 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
48528 dfinal.op1 = dfinal.op0;
48529 dfinal.one_operand_p = true;
48530 for (i = 0, j = 0; i < nelt; ++i)
48532 if (i == nelt2)
48533 j = 2;
48534 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
48535 if ((d->perm[i] / nelt4) == dremap.perm[j])
48537 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
48538 dfinal.perm[i] |= nelt4;
48539 else
48540 gcc_unreachable ();
48543 ok = expand_vec_perm_1 (&dremap);
48544 gcc_assert (ok);
48546 ok = expand_vec_perm_1 (&dfinal);
48547 gcc_assert (ok);
48549 return true;
48552 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
48553 a vector permutation using two instructions, vperm2f128 resp.
48554 vperm2i128 followed by any single in-lane permutation. */
48556 static bool
48557 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
48559 struct expand_vec_perm_d dfirst, dsecond;
48560 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
48561 bool ok;
48563 if (!TARGET_AVX
48564 || GET_MODE_SIZE (d->vmode) != 32
48565 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
48566 return false;
48568 dsecond = *d;
48569 dsecond.one_operand_p = false;
48570 dsecond.testing_p = true;
48572 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
48573 immediate. For perm < 16 the second permutation uses
48574 d->op0 as first operand, for perm >= 16 it uses d->op1
48575 as first operand. The second operand is the result of
48576 vperm2[fi]128. */
48577 for (perm = 0; perm < 32; perm++)
48579 /* Ignore permutations which do not move anything cross-lane. */
48580 if (perm < 16)
48582 /* The second shuffle for e.g. V4DFmode has
48583 0123 and ABCD operands.
48584 Ignore AB23, as 23 is already in the second lane
48585 of the first operand. */
48586 if ((perm & 0xc) == (1 << 2)) continue;
48587 /* And 01CD, as 01 is in the first lane of the first
48588 operand. */
48589 if ((perm & 3) == 0) continue;
48590 /* And 4567, as then the vperm2[fi]128 doesn't change
48591 anything on the original 4567 second operand. */
48592 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
48594 else
48596 /* The second shuffle for e.g. V4DFmode has
48597 4567 and ABCD operands.
48598 Ignore AB67, as 67 is already in the second lane
48599 of the first operand. */
48600 if ((perm & 0xc) == (3 << 2)) continue;
48601 /* And 45CD, as 45 is in the first lane of the first
48602 operand. */
48603 if ((perm & 3) == 2) continue;
48604 /* And 0123, as then the vperm2[fi]128 doesn't change
48605 anything on the original 0123 first operand. */
48606 if ((perm & 0xf) == (1 << 2)) continue;
48609 for (i = 0; i < nelt; i++)
48611 j = d->perm[i] / nelt2;
48612 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
48613 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
48614 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
48615 dsecond.perm[i] = d->perm[i] & (nelt - 1);
48616 else
48617 break;
48620 if (i == nelt)
48622 start_sequence ();
48623 ok = expand_vec_perm_1 (&dsecond);
48624 end_sequence ();
48626 else
48627 ok = false;
48629 if (ok)
48631 if (d->testing_p)
48632 return true;
48634 /* Found a usable second shuffle. dfirst will be
48635 vperm2f128 on d->op0 and d->op1. */
48636 dsecond.testing_p = false;
48637 dfirst = *d;
48638 dfirst.target = gen_reg_rtx (d->vmode);
48639 for (i = 0; i < nelt; i++)
48640 dfirst.perm[i] = (i & (nelt2 - 1))
48641 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
48643 canonicalize_perm (&dfirst);
48644 ok = expand_vec_perm_1 (&dfirst);
48645 gcc_assert (ok);
48647 /* And dsecond is some single insn shuffle, taking
48648 d->op0 and result of vperm2f128 (if perm < 16) or
48649 d->op1 and result of vperm2f128 (otherwise). */
48650 if (perm >= 16)
48651 dsecond.op0 = dsecond.op1;
48652 dsecond.op1 = dfirst.target;
48654 ok = expand_vec_perm_1 (&dsecond);
48655 gcc_assert (ok);
48657 return true;
48660 /* For one operand, the only useful vperm2f128 permutation is 0x01
48661 aka lanes swap. */
48662 if (d->one_operand_p)
48663 return false;
48666 return false;
48669 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
48670 a two vector permutation using 2 intra-lane interleave insns
48671 and cross-lane shuffle for 32-byte vectors. */
48673 static bool
48674 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
48676 unsigned i, nelt;
48677 rtx (*gen) (rtx, rtx, rtx);
48679 if (d->one_operand_p)
48680 return false;
48681 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
48683 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
48685 else
48686 return false;
48688 nelt = d->nelt;
48689 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
48690 return false;
48691 for (i = 0; i < nelt; i += 2)
48692 if (d->perm[i] != d->perm[0] + i / 2
48693 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
48694 return false;
48696 if (d->testing_p)
48697 return true;
48699 switch (d->vmode)
48701 case V32QImode:
48702 if (d->perm[0])
48703 gen = gen_vec_interleave_highv32qi;
48704 else
48705 gen = gen_vec_interleave_lowv32qi;
48706 break;
48707 case V16HImode:
48708 if (d->perm[0])
48709 gen = gen_vec_interleave_highv16hi;
48710 else
48711 gen = gen_vec_interleave_lowv16hi;
48712 break;
48713 case V8SImode:
48714 if (d->perm[0])
48715 gen = gen_vec_interleave_highv8si;
48716 else
48717 gen = gen_vec_interleave_lowv8si;
48718 break;
48719 case V4DImode:
48720 if (d->perm[0])
48721 gen = gen_vec_interleave_highv4di;
48722 else
48723 gen = gen_vec_interleave_lowv4di;
48724 break;
48725 case V8SFmode:
48726 if (d->perm[0])
48727 gen = gen_vec_interleave_highv8sf;
48728 else
48729 gen = gen_vec_interleave_lowv8sf;
48730 break;
48731 case V4DFmode:
48732 if (d->perm[0])
48733 gen = gen_vec_interleave_highv4df;
48734 else
48735 gen = gen_vec_interleave_lowv4df;
48736 break;
48737 default:
48738 gcc_unreachable ();
48741 emit_insn (gen (d->target, d->op0, d->op1));
48742 return true;
48745 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
48746 a single vector permutation using a single intra-lane vector
48747 permutation, vperm2f128 swapping the lanes and vblend* insn blending
48748 the non-swapped and swapped vectors together. */
48750 static bool
48751 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
48753 struct expand_vec_perm_d dfirst, dsecond;
48754 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
48755 rtx_insn *seq;
48756 bool ok;
48757 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
48759 if (!TARGET_AVX
48760 || TARGET_AVX2
48761 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
48762 || !d->one_operand_p)
48763 return false;
48765 dfirst = *d;
48766 for (i = 0; i < nelt; i++)
48767 dfirst.perm[i] = 0xff;
48768 for (i = 0, msk = 0; i < nelt; i++)
48770 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
48771 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
48772 return false;
48773 dfirst.perm[j] = d->perm[i];
48774 if (j != i)
48775 msk |= (1 << i);
48777 for (i = 0; i < nelt; i++)
48778 if (dfirst.perm[i] == 0xff)
48779 dfirst.perm[i] = i;
48781 if (!d->testing_p)
48782 dfirst.target = gen_reg_rtx (dfirst.vmode);
48784 start_sequence ();
48785 ok = expand_vec_perm_1 (&dfirst);
48786 seq = get_insns ();
48787 end_sequence ();
48789 if (!ok)
48790 return false;
48792 if (d->testing_p)
48793 return true;
48795 emit_insn (seq);
48797 dsecond = *d;
48798 dsecond.op0 = dfirst.target;
48799 dsecond.op1 = dfirst.target;
48800 dsecond.one_operand_p = true;
48801 dsecond.target = gen_reg_rtx (dsecond.vmode);
48802 for (i = 0; i < nelt; i++)
48803 dsecond.perm[i] = i ^ nelt2;
48805 ok = expand_vec_perm_1 (&dsecond);
48806 gcc_assert (ok);
48808 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
48809 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
48810 return true;
48813 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
48814 permutation using two vperm2f128, followed by a vshufpd insn blending
48815 the two vectors together. */
48817 static bool
48818 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
48820 struct expand_vec_perm_d dfirst, dsecond, dthird;
48821 bool ok;
48823 if (!TARGET_AVX || (d->vmode != V4DFmode))
48824 return false;
48826 if (d->testing_p)
48827 return true;
48829 dfirst = *d;
48830 dsecond = *d;
48831 dthird = *d;
48833 dfirst.perm[0] = (d->perm[0] & ~1);
48834 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
48835 dfirst.perm[2] = (d->perm[2] & ~1);
48836 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
48837 dsecond.perm[0] = (d->perm[1] & ~1);
48838 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
48839 dsecond.perm[2] = (d->perm[3] & ~1);
48840 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
48841 dthird.perm[0] = (d->perm[0] % 2);
48842 dthird.perm[1] = (d->perm[1] % 2) + 4;
48843 dthird.perm[2] = (d->perm[2] % 2) + 2;
48844 dthird.perm[3] = (d->perm[3] % 2) + 6;
48846 dfirst.target = gen_reg_rtx (dfirst.vmode);
48847 dsecond.target = gen_reg_rtx (dsecond.vmode);
48848 dthird.op0 = dfirst.target;
48849 dthird.op1 = dsecond.target;
48850 dthird.one_operand_p = false;
48852 canonicalize_perm (&dfirst);
48853 canonicalize_perm (&dsecond);
48855 ok = expand_vec_perm_1 (&dfirst)
48856 && expand_vec_perm_1 (&dsecond)
48857 && expand_vec_perm_1 (&dthird);
48859 gcc_assert (ok);
48861 return true;
48864 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
48865 permutation with two pshufb insns and an ior. We should have already
48866 failed all two instruction sequences. */
48868 static bool
48869 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
48871 rtx rperm[2][16], vperm, l, h, op, m128;
48872 unsigned int i, nelt, eltsz;
48874 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
48875 return false;
48876 gcc_assert (!d->one_operand_p);
48878 if (d->testing_p)
48879 return true;
48881 nelt = d->nelt;
48882 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
48884 /* Generate two permutation masks. If the required element is within
48885 the given vector it is shuffled into the proper lane. If the required
48886 element is in the other vector, force a zero into the lane by setting
48887 bit 7 in the permutation mask. */
48888 m128 = GEN_INT (-128);
48889 for (i = 0; i < nelt; ++i)
48891 unsigned j, e = d->perm[i];
48892 unsigned which = (e >= nelt);
48893 if (e >= nelt)
48894 e -= nelt;
48896 for (j = 0; j < eltsz; ++j)
48898 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
48899 rperm[1-which][i*eltsz + j] = m128;
48903 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
48904 vperm = force_reg (V16QImode, vperm);
48906 l = gen_reg_rtx (V16QImode);
48907 op = gen_lowpart (V16QImode, d->op0);
48908 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
48910 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
48911 vperm = force_reg (V16QImode, vperm);
48913 h = gen_reg_rtx (V16QImode);
48914 op = gen_lowpart (V16QImode, d->op1);
48915 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
48917 op = d->target;
48918 if (d->vmode != V16QImode)
48919 op = gen_reg_rtx (V16QImode);
48920 emit_insn (gen_iorv16qi3 (op, l, h));
48921 if (op != d->target)
48922 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48924 return true;
48927 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
48928 with two vpshufb insns, vpermq and vpor. We should have already failed
48929 all two or three instruction sequences. */
48931 static bool
48932 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
48934 rtx rperm[2][32], vperm, l, h, hp, op, m128;
48935 unsigned int i, nelt, eltsz;
48937 if (!TARGET_AVX2
48938 || !d->one_operand_p
48939 || (d->vmode != V32QImode && d->vmode != V16HImode))
48940 return false;
48942 if (d->testing_p)
48943 return true;
48945 nelt = d->nelt;
48946 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
48948 /* Generate two permutation masks. If the required element is within
48949 the same lane, it is shuffled in. If the required element from the
48950 other lane, force a zero by setting bit 7 in the permutation mask.
48951 In the other mask the mask has non-negative elements if element
48952 is requested from the other lane, but also moved to the other lane,
48953 so that the result of vpshufb can have the two V2TImode halves
48954 swapped. */
48955 m128 = GEN_INT (-128);
48956 for (i = 0; i < nelt; ++i)
48958 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48959 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
48961 for (j = 0; j < eltsz; ++j)
48963 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
48964 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
48968 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48969 vperm = force_reg (V32QImode, vperm);
48971 h = gen_reg_rtx (V32QImode);
48972 op = gen_lowpart (V32QImode, d->op0);
48973 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48975 /* Swap the 128-byte lanes of h into hp. */
48976 hp = gen_reg_rtx (V4DImode);
48977 op = gen_lowpart (V4DImode, h);
48978 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
48979 const1_rtx));
48981 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48982 vperm = force_reg (V32QImode, vperm);
48984 l = gen_reg_rtx (V32QImode);
48985 op = gen_lowpart (V32QImode, d->op0);
48986 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48988 op = d->target;
48989 if (d->vmode != V32QImode)
48990 op = gen_reg_rtx (V32QImode);
48991 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
48992 if (op != d->target)
48993 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48995 return true;
48998 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48999 and extract-odd permutations of two V32QImode and V16QImode operand
49000 with two vpshufb insns, vpor and vpermq. We should have already
49001 failed all two or three instruction sequences. */
49003 static bool
49004 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
49006 rtx rperm[2][32], vperm, l, h, ior, op, m128;
49007 unsigned int i, nelt, eltsz;
49009 if (!TARGET_AVX2
49010 || d->one_operand_p
49011 || (d->vmode != V32QImode && d->vmode != V16HImode))
49012 return false;
49014 for (i = 0; i < d->nelt; ++i)
49015 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
49016 return false;
49018 if (d->testing_p)
49019 return true;
49021 nelt = d->nelt;
49022 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
49024 /* Generate two permutation masks. In the first permutation mask
49025 the first quarter will contain indexes for the first half
49026 of the op0, the second quarter will contain bit 7 set, third quarter
49027 will contain indexes for the second half of the op0 and the
49028 last quarter bit 7 set. In the second permutation mask
49029 the first quarter will contain bit 7 set, the second quarter
49030 indexes for the first half of the op1, the third quarter bit 7 set
49031 and last quarter indexes for the second half of the op1.
49032 I.e. the first mask e.g. for V32QImode extract even will be:
49033 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
49034 (all values masked with 0xf except for -128) and second mask
49035 for extract even will be
49036 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
49037 m128 = GEN_INT (-128);
49038 for (i = 0; i < nelt; ++i)
49040 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
49041 unsigned which = d->perm[i] >= nelt;
49042 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
49044 for (j = 0; j < eltsz; ++j)
49046 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
49047 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
49051 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
49052 vperm = force_reg (V32QImode, vperm);
49054 l = gen_reg_rtx (V32QImode);
49055 op = gen_lowpart (V32QImode, d->op0);
49056 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
49058 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
49059 vperm = force_reg (V32QImode, vperm);
49061 h = gen_reg_rtx (V32QImode);
49062 op = gen_lowpart (V32QImode, d->op1);
49063 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
49065 ior = gen_reg_rtx (V32QImode);
49066 emit_insn (gen_iorv32qi3 (ior, l, h));
49068 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
49069 op = gen_reg_rtx (V4DImode);
49070 ior = gen_lowpart (V4DImode, ior);
49071 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
49072 const1_rtx, GEN_INT (3)));
49073 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
49075 return true;
49078 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
49079 and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands
49080 with two "and" and "pack" or two "shift" and "pack" insns. We should
49081 have already failed all two instruction sequences. */
49083 static bool
49084 expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
49086 rtx op, dop0, dop1, t, rperm[16];
49087 unsigned i, odd, c, s, nelt = d->nelt;
49088 bool end_perm = false;
49089 machine_mode half_mode;
49090 rtx (*gen_and) (rtx, rtx, rtx);
49091 rtx (*gen_pack) (rtx, rtx, rtx);
49092 rtx (*gen_shift) (rtx, rtx, rtx);
49094 if (d->one_operand_p)
49095 return false;
49097 switch (d->vmode)
49099 case V8HImode:
49100 /* Required for "pack". */
49101 if (!TARGET_SSE4_1)
49102 return false;
49103 c = 0xffff;
49104 s = 16;
49105 half_mode = V4SImode;
49106 gen_and = gen_andv4si3;
49107 gen_pack = gen_sse4_1_packusdw;
49108 gen_shift = gen_lshrv4si3;
49109 break;
49110 case V16QImode:
49111 /* No check as all instructions are SSE2. */
49112 c = 0xff;
49113 s = 8;
49114 half_mode = V8HImode;
49115 gen_and = gen_andv8hi3;
49116 gen_pack = gen_sse2_packuswb;
49117 gen_shift = gen_lshrv8hi3;
49118 break;
49119 case V16HImode:
49120 if (!TARGET_AVX2)
49121 return false;
49122 c = 0xffff;
49123 s = 16;
49124 half_mode = V8SImode;
49125 gen_and = gen_andv8si3;
49126 gen_pack = gen_avx2_packusdw;
49127 gen_shift = gen_lshrv8si3;
49128 end_perm = true;
49129 break;
49130 case V32QImode:
49131 if (!TARGET_AVX2)
49132 return false;
49133 c = 0xff;
49134 s = 8;
49135 half_mode = V16HImode;
49136 gen_and = gen_andv16hi3;
49137 gen_pack = gen_avx2_packuswb;
49138 gen_shift = gen_lshrv16hi3;
49139 end_perm = true;
49140 break;
49141 default:
49142 /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than
49143 general shuffles. */
49144 return false;
49147 /* Check that permutation is even or odd. */
49148 odd = d->perm[0];
49149 if (odd > 1)
49150 return false;
49152 for (i = 1; i < nelt; ++i)
49153 if (d->perm[i] != 2 * i + odd)
49154 return false;
49156 if (d->testing_p)
49157 return true;
49159 dop0 = gen_reg_rtx (half_mode);
49160 dop1 = gen_reg_rtx (half_mode);
49161 if (odd == 0)
49163 for (i = 0; i < nelt / 2; i++)
49164 rperm[i] = GEN_INT (c);
49165 t = gen_rtx_CONST_VECTOR (half_mode, gen_rtvec_v (nelt / 2, rperm));
49166 t = force_reg (half_mode, t);
49167 emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0)));
49168 emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1)));
49170 else
49172 emit_insn (gen_shift (dop0,
49173 gen_lowpart (half_mode, d->op0),
49174 GEN_INT (s)));
49175 emit_insn (gen_shift (dop1,
49176 gen_lowpart (half_mode, d->op1),
49177 GEN_INT (s)));
49179 /* In AVX2 for 256 bit case we need to permute pack result. */
49180 if (TARGET_AVX2 && end_perm)
49182 op = gen_reg_rtx (d->vmode);
49183 t = gen_reg_rtx (V4DImode);
49184 emit_insn (gen_pack (op, dop0, dop1));
49185 emit_insn (gen_avx2_permv4di_1 (t,
49186 gen_lowpart (V4DImode, op),
49187 const0_rtx,
49188 const2_rtx,
49189 const1_rtx,
49190 GEN_INT (3)));
49191 emit_move_insn (d->target, gen_lowpart (d->vmode, t));
49193 else
49194 emit_insn (gen_pack (d->target, dop0, dop1));
49196 return true;
49199 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
49200 and extract-odd permutations. */
49202 static bool
49203 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
49205 rtx t1, t2, t3, t4, t5;
49207 switch (d->vmode)
49209 case V4DFmode:
49210 if (d->testing_p)
49211 break;
49212 t1 = gen_reg_rtx (V4DFmode);
49213 t2 = gen_reg_rtx (V4DFmode);
49215 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
49216 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
49217 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
49219 /* Now an unpck[lh]pd will produce the result required. */
49220 if (odd)
49221 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
49222 else
49223 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
49224 emit_insn (t3);
49225 break;
49227 case V8SFmode:
49229 int mask = odd ? 0xdd : 0x88;
49231 if (d->testing_p)
49232 break;
49233 t1 = gen_reg_rtx (V8SFmode);
49234 t2 = gen_reg_rtx (V8SFmode);
49235 t3 = gen_reg_rtx (V8SFmode);
49237 /* Shuffle within the 128-bit lanes to produce:
49238 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
49239 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
49240 GEN_INT (mask)));
49242 /* Shuffle the lanes around to produce:
49243 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
49244 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
49245 GEN_INT (0x3)));
49247 /* Shuffle within the 128-bit lanes to produce:
49248 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
49249 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
49251 /* Shuffle within the 128-bit lanes to produce:
49252 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
49253 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
49255 /* Shuffle the lanes around to produce:
49256 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
49257 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
49258 GEN_INT (0x20)));
49260 break;
49262 case V2DFmode:
49263 case V4SFmode:
49264 case V2DImode:
49265 case V4SImode:
49266 /* These are always directly implementable by expand_vec_perm_1. */
49267 gcc_unreachable ();
49269 case V8HImode:
49270 if (TARGET_SSE4_1)
49271 return expand_vec_perm_even_odd_pack (d);
49272 else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
49273 return expand_vec_perm_pshufb2 (d);
49274 else
49276 if (d->testing_p)
49277 break;
49278 /* We need 2*log2(N)-1 operations to achieve odd/even
49279 with interleave. */
49280 t1 = gen_reg_rtx (V8HImode);
49281 t2 = gen_reg_rtx (V8HImode);
49282 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
49283 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
49284 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
49285 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
49286 if (odd)
49287 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
49288 else
49289 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
49290 emit_insn (t3);
49292 break;
49294 case V16QImode:
49295 return expand_vec_perm_even_odd_pack (d);
49297 case V16HImode:
49298 case V32QImode:
49299 return expand_vec_perm_even_odd_pack (d);
49301 case V4DImode:
49302 if (!TARGET_AVX2)
49304 struct expand_vec_perm_d d_copy = *d;
49305 d_copy.vmode = V4DFmode;
49306 if (d->testing_p)
49307 d_copy.target = gen_lowpart (V4DFmode, d->target);
49308 else
49309 d_copy.target = gen_reg_rtx (V4DFmode);
49310 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
49311 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
49312 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
49314 if (!d->testing_p)
49315 emit_move_insn (d->target,
49316 gen_lowpart (V4DImode, d_copy.target));
49317 return true;
49319 return false;
49322 if (d->testing_p)
49323 break;
49325 t1 = gen_reg_rtx (V4DImode);
49326 t2 = gen_reg_rtx (V4DImode);
49328 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
49329 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
49330 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
49332 /* Now an vpunpck[lh]qdq will produce the result required. */
49333 if (odd)
49334 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
49335 else
49336 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
49337 emit_insn (t3);
49338 break;
49340 case V8SImode:
49341 if (!TARGET_AVX2)
49343 struct expand_vec_perm_d d_copy = *d;
49344 d_copy.vmode = V8SFmode;
49345 if (d->testing_p)
49346 d_copy.target = gen_lowpart (V8SFmode, d->target);
49347 else
49348 d_copy.target = gen_reg_rtx (V8SFmode);
49349 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
49350 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
49351 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
49353 if (!d->testing_p)
49354 emit_move_insn (d->target,
49355 gen_lowpart (V8SImode, d_copy.target));
49356 return true;
49358 return false;
49361 if (d->testing_p)
49362 break;
49364 t1 = gen_reg_rtx (V8SImode);
49365 t2 = gen_reg_rtx (V8SImode);
49366 t3 = gen_reg_rtx (V4DImode);
49367 t4 = gen_reg_rtx (V4DImode);
49368 t5 = gen_reg_rtx (V4DImode);
49370 /* Shuffle the lanes around into
49371 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
49372 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
49373 gen_lowpart (V4DImode, d->op1),
49374 GEN_INT (0x20)));
49375 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
49376 gen_lowpart (V4DImode, d->op1),
49377 GEN_INT (0x31)));
49379 /* Swap the 2nd and 3rd position in each lane into
49380 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
49381 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
49382 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
49383 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
49384 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
49386 /* Now an vpunpck[lh]qdq will produce
49387 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
49388 if (odd)
49389 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
49390 gen_lowpart (V4DImode, t2));
49391 else
49392 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
49393 gen_lowpart (V4DImode, t2));
49394 emit_insn (t3);
49395 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
49396 break;
49398 default:
49399 gcc_unreachable ();
49402 return true;
49405 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
49406 extract-even and extract-odd permutations. */
49408 static bool
49409 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
49411 unsigned i, odd, nelt = d->nelt;
49413 odd = d->perm[0];
49414 if (odd != 0 && odd != 1)
49415 return false;
49417 for (i = 1; i < nelt; ++i)
49418 if (d->perm[i] != 2 * i + odd)
49419 return false;
49421 return expand_vec_perm_even_odd_1 (d, odd);
49424 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
49425 permutations. We assume that expand_vec_perm_1 has already failed. */
49427 static bool
49428 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
49430 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
49431 machine_mode vmode = d->vmode;
49432 unsigned char perm2[4];
49433 rtx op0 = d->op0, dest;
49434 bool ok;
49436 switch (vmode)
49438 case V4DFmode:
49439 case V8SFmode:
49440 /* These are special-cased in sse.md so that we can optionally
49441 use the vbroadcast instruction. They expand to two insns
49442 if the input happens to be in a register. */
49443 gcc_unreachable ();
49445 case V2DFmode:
49446 case V2DImode:
49447 case V4SFmode:
49448 case V4SImode:
49449 /* These are always implementable using standard shuffle patterns. */
49450 gcc_unreachable ();
49452 case V8HImode:
49453 case V16QImode:
49454 /* These can be implemented via interleave. We save one insn by
49455 stopping once we have promoted to V4SImode and then use pshufd. */
49456 if (d->testing_p)
49457 return true;
49460 rtx dest;
49461 rtx (*gen) (rtx, rtx, rtx)
49462 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
49463 : gen_vec_interleave_lowv8hi;
49465 if (elt >= nelt2)
49467 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
49468 : gen_vec_interleave_highv8hi;
49469 elt -= nelt2;
49471 nelt2 /= 2;
49473 dest = gen_reg_rtx (vmode);
49474 emit_insn (gen (dest, op0, op0));
49475 vmode = get_mode_wider_vector (vmode);
49476 op0 = gen_lowpart (vmode, dest);
49478 while (vmode != V4SImode);
49480 memset (perm2, elt, 4);
49481 dest = gen_reg_rtx (V4SImode);
49482 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
49483 gcc_assert (ok);
49484 if (!d->testing_p)
49485 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
49486 return true;
49488 case V64QImode:
49489 case V32QImode:
49490 case V16HImode:
49491 case V8SImode:
49492 case V4DImode:
49493 /* For AVX2 broadcasts of the first element vpbroadcast* or
49494 vpermq should be used by expand_vec_perm_1. */
49495 gcc_assert (!TARGET_AVX2 || d->perm[0]);
49496 return false;
49498 default:
49499 gcc_unreachable ();
49503 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
49504 broadcast permutations. */
49506 static bool
49507 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
49509 unsigned i, elt, nelt = d->nelt;
49511 if (!d->one_operand_p)
49512 return false;
49514 elt = d->perm[0];
49515 for (i = 1; i < nelt; ++i)
49516 if (d->perm[i] != elt)
49517 return false;
49519 return expand_vec_perm_broadcast_1 (d);
49522 /* Implement arbitrary permutations of two V64QImode operands
49523 will 2 vpermi2w, 2 vpshufb and one vpor instruction. */
49524 static bool
49525 expand_vec_perm_vpermi2_vpshub2 (struct expand_vec_perm_d *d)
49527 if (!TARGET_AVX512BW || !(d->vmode == V64QImode))
49528 return false;
49530 if (d->testing_p)
49531 return true;
49533 struct expand_vec_perm_d ds[2];
49534 rtx rperm[128], vperm, target0, target1;
49535 unsigned int i, nelt;
49536 machine_mode vmode;
49538 nelt = d->nelt;
49539 vmode = V64QImode;
49541 for (i = 0; i < 2; i++)
49543 ds[i] = *d;
49544 ds[i].vmode = V32HImode;
49545 ds[i].nelt = 32;
49546 ds[i].target = gen_reg_rtx (V32HImode);
49547 ds[i].op0 = gen_lowpart (V32HImode, d->op0);
49548 ds[i].op1 = gen_lowpart (V32HImode, d->op1);
49551 /* Prepare permutations such that the first one takes care of
49552 putting the even bytes into the right positions or one higher
49553 positions (ds[0]) and the second one takes care of
49554 putting the odd bytes into the right positions or one below
49555 (ds[1]). */
49557 for (i = 0; i < nelt; i++)
49559 ds[i & 1].perm[i / 2] = d->perm[i] / 2;
49560 if (i & 1)
49562 rperm[i] = constm1_rtx;
49563 rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1));
49565 else
49567 rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1));
49568 rperm[i + 64] = constm1_rtx;
49572 bool ok = expand_vec_perm_1 (&ds[0]);
49573 gcc_assert (ok);
49574 ds[0].target = gen_lowpart (V64QImode, ds[0].target);
49576 ok = expand_vec_perm_1 (&ds[1]);
49577 gcc_assert (ok);
49578 ds[1].target = gen_lowpart (V64QImode, ds[1].target);
49580 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm));
49581 vperm = force_reg (vmode, vperm);
49582 target0 = gen_reg_rtx (V64QImode);
49583 emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm));
49585 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64));
49586 vperm = force_reg (vmode, vperm);
49587 target1 = gen_reg_rtx (V64QImode);
49588 emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm));
49590 emit_insn (gen_iorv64qi3 (d->target, target0, target1));
49591 return true;
49594 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
49595 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
49596 all the shorter instruction sequences. */
49598 static bool
49599 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
49601 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
49602 unsigned int i, nelt, eltsz;
49603 bool used[4];
49605 if (!TARGET_AVX2
49606 || d->one_operand_p
49607 || (d->vmode != V32QImode && d->vmode != V16HImode))
49608 return false;
49610 if (d->testing_p)
49611 return true;
49613 nelt = d->nelt;
49614 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
49616 /* Generate 4 permutation masks. If the required element is within
49617 the same lane, it is shuffled in. If the required element from the
49618 other lane, force a zero by setting bit 7 in the permutation mask.
49619 In the other mask the mask has non-negative elements if element
49620 is requested from the other lane, but also moved to the other lane,
49621 so that the result of vpshufb can have the two V2TImode halves
49622 swapped. */
49623 m128 = GEN_INT (-128);
49624 for (i = 0; i < 32; ++i)
49626 rperm[0][i] = m128;
49627 rperm[1][i] = m128;
49628 rperm[2][i] = m128;
49629 rperm[3][i] = m128;
49631 used[0] = false;
49632 used[1] = false;
49633 used[2] = false;
49634 used[3] = false;
49635 for (i = 0; i < nelt; ++i)
49637 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
49638 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
49639 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
49641 for (j = 0; j < eltsz; ++j)
49642 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
49643 used[which] = true;
49646 for (i = 0; i < 2; ++i)
49648 if (!used[2 * i + 1])
49650 h[i] = NULL_RTX;
49651 continue;
49653 vperm = gen_rtx_CONST_VECTOR (V32QImode,
49654 gen_rtvec_v (32, rperm[2 * i + 1]));
49655 vperm = force_reg (V32QImode, vperm);
49656 h[i] = gen_reg_rtx (V32QImode);
49657 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49658 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
49661 /* Swap the 128-byte lanes of h[X]. */
49662 for (i = 0; i < 2; ++i)
49664 if (h[i] == NULL_RTX)
49665 continue;
49666 op = gen_reg_rtx (V4DImode);
49667 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
49668 const2_rtx, GEN_INT (3), const0_rtx,
49669 const1_rtx));
49670 h[i] = gen_lowpart (V32QImode, op);
49673 for (i = 0; i < 2; ++i)
49675 if (!used[2 * i])
49677 l[i] = NULL_RTX;
49678 continue;
49680 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
49681 vperm = force_reg (V32QImode, vperm);
49682 l[i] = gen_reg_rtx (V32QImode);
49683 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49684 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
49687 for (i = 0; i < 2; ++i)
49689 if (h[i] && l[i])
49691 op = gen_reg_rtx (V32QImode);
49692 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
49693 l[i] = op;
49695 else if (h[i])
49696 l[i] = h[i];
49699 gcc_assert (l[0] && l[1]);
49700 op = d->target;
49701 if (d->vmode != V32QImode)
49702 op = gen_reg_rtx (V32QImode);
49703 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
49704 if (op != d->target)
49705 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
49706 return true;
49709 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
49710 With all of the interface bits taken care of, perform the expansion
49711 in D and return true on success. */
49713 static bool
49714 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
49716 /* Try a single instruction expansion. */
49717 if (expand_vec_perm_1 (d))
49718 return true;
49720 /* Try sequences of two instructions. */
49722 if (expand_vec_perm_pshuflw_pshufhw (d))
49723 return true;
49725 if (expand_vec_perm_palignr (d, false))
49726 return true;
49728 if (expand_vec_perm_interleave2 (d))
49729 return true;
49731 if (expand_vec_perm_broadcast (d))
49732 return true;
49734 if (expand_vec_perm_vpermq_perm_1 (d))
49735 return true;
49737 if (expand_vec_perm_vperm2f128 (d))
49738 return true;
49740 if (expand_vec_perm_pblendv (d))
49741 return true;
49743 /* Try sequences of three instructions. */
49745 if (expand_vec_perm_even_odd_pack (d))
49746 return true;
49748 if (expand_vec_perm_2vperm2f128_vshuf (d))
49749 return true;
49751 if (expand_vec_perm_pshufb2 (d))
49752 return true;
49754 if (expand_vec_perm_interleave3 (d))
49755 return true;
49757 if (expand_vec_perm_vperm2f128_vblend (d))
49758 return true;
49760 /* Try sequences of four instructions. */
49762 if (expand_vec_perm_vpshufb2_vpermq (d))
49763 return true;
49765 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
49766 return true;
49768 if (expand_vec_perm_vpermi2_vpshub2 (d))
49769 return true;
49771 /* ??? Look for narrow permutations whose element orderings would
49772 allow the promotion to a wider mode. */
49774 /* ??? Look for sequences of interleave or a wider permute that place
49775 the data into the correct lanes for a half-vector shuffle like
49776 pshuf[lh]w or vpermilps. */
49778 /* ??? Look for sequences of interleave that produce the desired results.
49779 The combinatorics of punpck[lh] get pretty ugly... */
49781 if (expand_vec_perm_even_odd (d))
49782 return true;
49784 /* Even longer sequences. */
49785 if (expand_vec_perm_vpshufb4_vpermq2 (d))
49786 return true;
49788 return false;
49791 /* If a permutation only uses one operand, make it clear. Returns true
49792 if the permutation references both operands. */
49794 static bool
49795 canonicalize_perm (struct expand_vec_perm_d *d)
49797 int i, which, nelt = d->nelt;
49799 for (i = which = 0; i < nelt; ++i)
49800 which |= (d->perm[i] < nelt ? 1 : 2);
49802 d->one_operand_p = true;
49803 switch (which)
49805 default:
49806 gcc_unreachable();
49808 case 3:
49809 if (!rtx_equal_p (d->op0, d->op1))
49811 d->one_operand_p = false;
49812 break;
49814 /* The elements of PERM do not suggest that only the first operand
49815 is used, but both operands are identical. Allow easier matching
49816 of the permutation by folding the permutation into the single
49817 input vector. */
49818 /* FALLTHRU */
49820 case 2:
49821 for (i = 0; i < nelt; ++i)
49822 d->perm[i] &= nelt - 1;
49823 d->op0 = d->op1;
49824 break;
49826 case 1:
49827 d->op1 = d->op0;
49828 break;
49831 return (which == 3);
49834 bool
49835 ix86_expand_vec_perm_const (rtx operands[4])
49837 struct expand_vec_perm_d d;
49838 unsigned char perm[MAX_VECT_LEN];
49839 int i, nelt;
49840 bool two_args;
49841 rtx sel;
49843 d.target = operands[0];
49844 d.op0 = operands[1];
49845 d.op1 = operands[2];
49846 sel = operands[3];
49848 d.vmode = GET_MODE (d.target);
49849 gcc_assert (VECTOR_MODE_P (d.vmode));
49850 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49851 d.testing_p = false;
49853 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
49854 gcc_assert (XVECLEN (sel, 0) == nelt);
49855 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
49857 for (i = 0; i < nelt; ++i)
49859 rtx e = XVECEXP (sel, 0, i);
49860 int ei = INTVAL (e) & (2 * nelt - 1);
49861 d.perm[i] = ei;
49862 perm[i] = ei;
49865 two_args = canonicalize_perm (&d);
49867 if (ix86_expand_vec_perm_const_1 (&d))
49868 return true;
49870 /* If the selector says both arguments are needed, but the operands are the
49871 same, the above tried to expand with one_operand_p and flattened selector.
49872 If that didn't work, retry without one_operand_p; we succeeded with that
49873 during testing. */
49874 if (two_args && d.one_operand_p)
49876 d.one_operand_p = false;
49877 memcpy (d.perm, perm, sizeof (perm));
49878 return ix86_expand_vec_perm_const_1 (&d);
49881 return false;
49884 /* Implement targetm.vectorize.vec_perm_const_ok. */
49886 static bool
49887 ix86_vectorize_vec_perm_const_ok (machine_mode vmode,
49888 const unsigned char *sel)
49890 struct expand_vec_perm_d d;
49891 unsigned int i, nelt, which;
49892 bool ret;
49894 d.vmode = vmode;
49895 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49896 d.testing_p = true;
49898 /* Given sufficient ISA support we can just return true here
49899 for selected vector modes. */
49900 switch (d.vmode)
49902 case V16SFmode:
49903 case V16SImode:
49904 case V8DImode:
49905 case V8DFmode:
49906 if (TARGET_AVX512F)
49907 /* All implementable with a single vpermi2 insn. */
49908 return true;
49909 break;
49910 case V32HImode:
49911 if (TARGET_AVX512BW)
49912 /* All implementable with a single vpermi2 insn. */
49913 return true;
49914 break;
49915 case V64QImode:
49916 if (TARGET_AVX512BW)
49917 /* Implementable with 2 vpermi2, 2 vpshufb and 1 or insn. */
49918 return true;
49919 break;
49920 case V8SImode:
49921 case V8SFmode:
49922 case V4DFmode:
49923 case V4DImode:
49924 if (TARGET_AVX512VL)
49925 /* All implementable with a single vpermi2 insn. */
49926 return true;
49927 break;
49928 case V16HImode:
49929 if (TARGET_AVX2)
49930 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49931 return true;
49932 break;
49933 case V32QImode:
49934 if (TARGET_AVX2)
49935 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49936 return true;
49937 break;
49938 case V4SImode:
49939 case V4SFmode:
49940 case V8HImode:
49941 case V16QImode:
49942 /* All implementable with a single vpperm insn. */
49943 if (TARGET_XOP)
49944 return true;
49945 /* All implementable with 2 pshufb + 1 ior. */
49946 if (TARGET_SSSE3)
49947 return true;
49948 break;
49949 case V2DImode:
49950 case V2DFmode:
49951 /* All implementable with shufpd or unpck[lh]pd. */
49952 return true;
49953 default:
49954 return false;
49957 /* Extract the values from the vector CST into the permutation
49958 array in D. */
49959 memcpy (d.perm, sel, nelt);
49960 for (i = which = 0; i < nelt; ++i)
49962 unsigned char e = d.perm[i];
49963 gcc_assert (e < 2 * nelt);
49964 which |= (e < nelt ? 1 : 2);
49967 /* For all elements from second vector, fold the elements to first. */
49968 if (which == 2)
49969 for (i = 0; i < nelt; ++i)
49970 d.perm[i] -= nelt;
49972 /* Check whether the mask can be applied to the vector type. */
49973 d.one_operand_p = (which != 3);
49975 /* Implementable with shufps or pshufd. */
49976 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
49977 return true;
49979 /* Otherwise we have to go through the motions and see if we can
49980 figure out how to generate the requested permutation. */
49981 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
49982 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
49983 if (!d.one_operand_p)
49984 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
49986 start_sequence ();
49987 ret = ix86_expand_vec_perm_const_1 (&d);
49988 end_sequence ();
49990 return ret;
49993 void
49994 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
49996 struct expand_vec_perm_d d;
49997 unsigned i, nelt;
49999 d.target = targ;
50000 d.op0 = op0;
50001 d.op1 = op1;
50002 d.vmode = GET_MODE (targ);
50003 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
50004 d.one_operand_p = false;
50005 d.testing_p = false;
50007 for (i = 0; i < nelt; ++i)
50008 d.perm[i] = i * 2 + odd;
50010 /* We'll either be able to implement the permutation directly... */
50011 if (expand_vec_perm_1 (&d))
50012 return;
50014 /* ... or we use the special-case patterns. */
50015 expand_vec_perm_even_odd_1 (&d, odd);
50018 static void
50019 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
50021 struct expand_vec_perm_d d;
50022 unsigned i, nelt, base;
50023 bool ok;
50025 d.target = targ;
50026 d.op0 = op0;
50027 d.op1 = op1;
50028 d.vmode = GET_MODE (targ);
50029 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
50030 d.one_operand_p = false;
50031 d.testing_p = false;
50033 base = high_p ? nelt / 2 : 0;
50034 for (i = 0; i < nelt / 2; ++i)
50036 d.perm[i * 2] = i + base;
50037 d.perm[i * 2 + 1] = i + base + nelt;
50040 /* Note that for AVX this isn't one instruction. */
50041 ok = ix86_expand_vec_perm_const_1 (&d);
50042 gcc_assert (ok);
50046 /* Expand a vector operation CODE for a V*QImode in terms of the
50047 same operation on V*HImode. */
50049 void
50050 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
50052 machine_mode qimode = GET_MODE (dest);
50053 machine_mode himode;
50054 rtx (*gen_il) (rtx, rtx, rtx);
50055 rtx (*gen_ih) (rtx, rtx, rtx);
50056 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
50057 struct expand_vec_perm_d d;
50058 bool ok, full_interleave;
50059 bool uns_p = false;
50060 int i;
50062 switch (qimode)
50064 case V16QImode:
50065 himode = V8HImode;
50066 gen_il = gen_vec_interleave_lowv16qi;
50067 gen_ih = gen_vec_interleave_highv16qi;
50068 break;
50069 case V32QImode:
50070 himode = V16HImode;
50071 gen_il = gen_avx2_interleave_lowv32qi;
50072 gen_ih = gen_avx2_interleave_highv32qi;
50073 break;
50074 case V64QImode:
50075 himode = V32HImode;
50076 gen_il = gen_avx512bw_interleave_lowv64qi;
50077 gen_ih = gen_avx512bw_interleave_highv64qi;
50078 break;
50079 default:
50080 gcc_unreachable ();
50083 op2_l = op2_h = op2;
50084 switch (code)
50086 case MULT:
50087 /* Unpack data such that we've got a source byte in each low byte of
50088 each word. We don't care what goes into the high byte of each word.
50089 Rather than trying to get zero in there, most convenient is to let
50090 it be a copy of the low byte. */
50091 op2_l = gen_reg_rtx (qimode);
50092 op2_h = gen_reg_rtx (qimode);
50093 emit_insn (gen_il (op2_l, op2, op2));
50094 emit_insn (gen_ih (op2_h, op2, op2));
50095 /* FALLTHRU */
50097 op1_l = gen_reg_rtx (qimode);
50098 op1_h = gen_reg_rtx (qimode);
50099 emit_insn (gen_il (op1_l, op1, op1));
50100 emit_insn (gen_ih (op1_h, op1, op1));
50101 full_interleave = qimode == V16QImode;
50102 break;
50104 case ASHIFT:
50105 case LSHIFTRT:
50106 uns_p = true;
50107 /* FALLTHRU */
50108 case ASHIFTRT:
50109 op1_l = gen_reg_rtx (himode);
50110 op1_h = gen_reg_rtx (himode);
50111 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
50112 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
50113 full_interleave = true;
50114 break;
50115 default:
50116 gcc_unreachable ();
50119 /* Perform the operation. */
50120 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
50121 1, OPTAB_DIRECT);
50122 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
50123 1, OPTAB_DIRECT);
50124 gcc_assert (res_l && res_h);
50126 /* Merge the data back into the right place. */
50127 d.target = dest;
50128 d.op0 = gen_lowpart (qimode, res_l);
50129 d.op1 = gen_lowpart (qimode, res_h);
50130 d.vmode = qimode;
50131 d.nelt = GET_MODE_NUNITS (qimode);
50132 d.one_operand_p = false;
50133 d.testing_p = false;
50135 if (full_interleave)
50137 /* For SSE2, we used an full interleave, so the desired
50138 results are in the even elements. */
50139 for (i = 0; i < 64; ++i)
50140 d.perm[i] = i * 2;
50142 else
50144 /* For AVX, the interleave used above was not cross-lane. So the
50145 extraction is evens but with the second and third quarter swapped.
50146 Happily, that is even one insn shorter than even extraction. */
50147 for (i = 0; i < 64; ++i)
50148 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
50151 ok = ix86_expand_vec_perm_const_1 (&d);
50152 gcc_assert (ok);
50154 set_unique_reg_note (get_last_insn (), REG_EQUAL,
50155 gen_rtx_fmt_ee (code, qimode, op1, op2));
50158 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
50159 if op is CONST_VECTOR with all odd elements equal to their
50160 preceding element. */
50162 static bool
50163 const_vector_equal_evenodd_p (rtx op)
50165 machine_mode mode = GET_MODE (op);
50166 int i, nunits = GET_MODE_NUNITS (mode);
50167 if (GET_CODE (op) != CONST_VECTOR
50168 || nunits != CONST_VECTOR_NUNITS (op))
50169 return false;
50170 for (i = 0; i < nunits; i += 2)
50171 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
50172 return false;
50173 return true;
50176 void
50177 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
50178 bool uns_p, bool odd_p)
50180 machine_mode mode = GET_MODE (op1);
50181 machine_mode wmode = GET_MODE (dest);
50182 rtx x;
50183 rtx orig_op1 = op1, orig_op2 = op2;
50185 if (!nonimmediate_operand (op1, mode))
50186 op1 = force_reg (mode, op1);
50187 if (!nonimmediate_operand (op2, mode))
50188 op2 = force_reg (mode, op2);
50190 /* We only play even/odd games with vectors of SImode. */
50191 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
50193 /* If we're looking for the odd results, shift those members down to
50194 the even slots. For some cpus this is faster than a PSHUFD. */
50195 if (odd_p)
50197 /* For XOP use vpmacsdqh, but only for smult, as it is only
50198 signed. */
50199 if (TARGET_XOP && mode == V4SImode && !uns_p)
50201 x = force_reg (wmode, CONST0_RTX (wmode));
50202 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
50203 return;
50206 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
50207 if (!const_vector_equal_evenodd_p (orig_op1))
50208 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
50209 x, NULL, 1, OPTAB_DIRECT);
50210 if (!const_vector_equal_evenodd_p (orig_op2))
50211 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
50212 x, NULL, 1, OPTAB_DIRECT);
50213 op1 = gen_lowpart (mode, op1);
50214 op2 = gen_lowpart (mode, op2);
50217 if (mode == V16SImode)
50219 if (uns_p)
50220 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
50221 else
50222 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
50224 else if (mode == V8SImode)
50226 if (uns_p)
50227 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
50228 else
50229 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
50231 else if (uns_p)
50232 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
50233 else if (TARGET_SSE4_1)
50234 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
50235 else
50237 rtx s1, s2, t0, t1, t2;
50239 /* The easiest way to implement this without PMULDQ is to go through
50240 the motions as if we are performing a full 64-bit multiply. With
50241 the exception that we need to do less shuffling of the elements. */
50243 /* Compute the sign-extension, aka highparts, of the two operands. */
50244 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
50245 op1, pc_rtx, pc_rtx);
50246 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
50247 op2, pc_rtx, pc_rtx);
50249 /* Multiply LO(A) * HI(B), and vice-versa. */
50250 t1 = gen_reg_rtx (wmode);
50251 t2 = gen_reg_rtx (wmode);
50252 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
50253 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
50255 /* Multiply LO(A) * LO(B). */
50256 t0 = gen_reg_rtx (wmode);
50257 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
50259 /* Combine and shift the highparts into place. */
50260 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
50261 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
50262 1, OPTAB_DIRECT);
50264 /* Combine high and low parts. */
50265 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
50266 return;
50268 emit_insn (x);
50271 void
50272 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
50273 bool uns_p, bool high_p)
50275 machine_mode wmode = GET_MODE (dest);
50276 machine_mode mode = GET_MODE (op1);
50277 rtx t1, t2, t3, t4, mask;
50279 switch (mode)
50281 case V4SImode:
50282 t1 = gen_reg_rtx (mode);
50283 t2 = gen_reg_rtx (mode);
50284 if (TARGET_XOP && !uns_p)
50286 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
50287 shuffle the elements once so that all elements are in the right
50288 place for immediate use: { A C B D }. */
50289 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
50290 const1_rtx, GEN_INT (3)));
50291 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
50292 const1_rtx, GEN_INT (3)));
50294 else
50296 /* Put the elements into place for the multiply. */
50297 ix86_expand_vec_interleave (t1, op1, op1, high_p);
50298 ix86_expand_vec_interleave (t2, op2, op2, high_p);
50299 high_p = false;
50301 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
50302 break;
50304 case V8SImode:
50305 /* Shuffle the elements between the lanes. After this we
50306 have { A B E F | C D G H } for each operand. */
50307 t1 = gen_reg_rtx (V4DImode);
50308 t2 = gen_reg_rtx (V4DImode);
50309 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
50310 const0_rtx, const2_rtx,
50311 const1_rtx, GEN_INT (3)));
50312 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
50313 const0_rtx, const2_rtx,
50314 const1_rtx, GEN_INT (3)));
50316 /* Shuffle the elements within the lanes. After this we
50317 have { A A B B | C C D D } or { E E F F | G G H H }. */
50318 t3 = gen_reg_rtx (V8SImode);
50319 t4 = gen_reg_rtx (V8SImode);
50320 mask = GEN_INT (high_p
50321 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
50322 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
50323 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
50324 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
50326 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
50327 break;
50329 case V8HImode:
50330 case V16HImode:
50331 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
50332 uns_p, OPTAB_DIRECT);
50333 t2 = expand_binop (mode,
50334 uns_p ? umul_highpart_optab : smul_highpart_optab,
50335 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
50336 gcc_assert (t1 && t2);
50338 t3 = gen_reg_rtx (mode);
50339 ix86_expand_vec_interleave (t3, t1, t2, high_p);
50340 emit_move_insn (dest, gen_lowpart (wmode, t3));
50341 break;
50343 case V16QImode:
50344 case V32QImode:
50345 case V32HImode:
50346 case V16SImode:
50347 case V64QImode:
50348 t1 = gen_reg_rtx (wmode);
50349 t2 = gen_reg_rtx (wmode);
50350 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
50351 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
50353 emit_insn (gen_rtx_SET (dest, gen_rtx_MULT (wmode, t1, t2)));
50354 break;
50356 default:
50357 gcc_unreachable ();
50361 void
50362 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
50364 rtx res_1, res_2, res_3, res_4;
50366 res_1 = gen_reg_rtx (V4SImode);
50367 res_2 = gen_reg_rtx (V4SImode);
50368 res_3 = gen_reg_rtx (V2DImode);
50369 res_4 = gen_reg_rtx (V2DImode);
50370 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
50371 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
50373 /* Move the results in element 2 down to element 1; we don't care
50374 what goes in elements 2 and 3. Then we can merge the parts
50375 back together with an interleave.
50377 Note that two other sequences were tried:
50378 (1) Use interleaves at the start instead of psrldq, which allows
50379 us to use a single shufps to merge things back at the end.
50380 (2) Use shufps here to combine the two vectors, then pshufd to
50381 put the elements in the correct order.
50382 In both cases the cost of the reformatting stall was too high
50383 and the overall sequence slower. */
50385 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
50386 const0_rtx, const2_rtx,
50387 const0_rtx, const0_rtx));
50388 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
50389 const0_rtx, const2_rtx,
50390 const0_rtx, const0_rtx));
50391 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
50393 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
50396 void
50397 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
50399 machine_mode mode = GET_MODE (op0);
50400 rtx t1, t2, t3, t4, t5, t6;
50402 if (TARGET_AVX512DQ && mode == V8DImode)
50403 emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
50404 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
50405 emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
50406 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
50407 emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
50408 else if (TARGET_XOP && mode == V2DImode)
50410 /* op1: A,B,C,D, op2: E,F,G,H */
50411 op1 = gen_lowpart (V4SImode, op1);
50412 op2 = gen_lowpart (V4SImode, op2);
50414 t1 = gen_reg_rtx (V4SImode);
50415 t2 = gen_reg_rtx (V4SImode);
50416 t3 = gen_reg_rtx (V2DImode);
50417 t4 = gen_reg_rtx (V2DImode);
50419 /* t1: B,A,D,C */
50420 emit_insn (gen_sse2_pshufd_1 (t1, op1,
50421 GEN_INT (1),
50422 GEN_INT (0),
50423 GEN_INT (3),
50424 GEN_INT (2)));
50426 /* t2: (B*E),(A*F),(D*G),(C*H) */
50427 emit_insn (gen_mulv4si3 (t2, t1, op2));
50429 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
50430 emit_insn (gen_xop_phadddq (t3, t2));
50432 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
50433 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
50435 /* Multiply lower parts and add all */
50436 t5 = gen_reg_rtx (V2DImode);
50437 emit_insn (gen_vec_widen_umult_even_v4si (t5,
50438 gen_lowpart (V4SImode, op1),
50439 gen_lowpart (V4SImode, op2)));
50440 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
50443 else
50445 machine_mode nmode;
50446 rtx (*umul) (rtx, rtx, rtx);
50448 if (mode == V2DImode)
50450 umul = gen_vec_widen_umult_even_v4si;
50451 nmode = V4SImode;
50453 else if (mode == V4DImode)
50455 umul = gen_vec_widen_umult_even_v8si;
50456 nmode = V8SImode;
50458 else if (mode == V8DImode)
50460 umul = gen_vec_widen_umult_even_v16si;
50461 nmode = V16SImode;
50463 else
50464 gcc_unreachable ();
50467 /* Multiply low parts. */
50468 t1 = gen_reg_rtx (mode);
50469 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
50471 /* Shift input vectors right 32 bits so we can multiply high parts. */
50472 t6 = GEN_INT (32);
50473 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
50474 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
50476 /* Multiply high parts by low parts. */
50477 t4 = gen_reg_rtx (mode);
50478 t5 = gen_reg_rtx (mode);
50479 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
50480 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
50482 /* Combine and shift the highparts back. */
50483 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
50484 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
50486 /* Combine high and low parts. */
50487 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
50490 set_unique_reg_note (get_last_insn (), REG_EQUAL,
50491 gen_rtx_MULT (mode, op1, op2));
50494 /* Return 1 if control tansfer instruction INSN
50495 should be encoded with bnd prefix.
50496 If insn is NULL then return 1 when control
50497 transfer instructions should be prefixed with
50498 bnd by default for current function. */
50500 bool
50501 ix86_bnd_prefixed_insn_p (rtx insn)
50503 /* For call insns check special flag. */
50504 if (insn && CALL_P (insn))
50506 rtx call = get_call_rtx_from (insn);
50507 if (call)
50508 return CALL_EXPR_WITH_BOUNDS_P (call);
50511 /* All other insns are prefixed only if function is instrumented. */
50512 return chkp_function_instrumented_p (current_function_decl);
50515 /* Calculate integer abs() using only SSE2 instructions. */
50517 void
50518 ix86_expand_sse2_abs (rtx target, rtx input)
50520 machine_mode mode = GET_MODE (target);
50521 rtx tmp0, tmp1, x;
50523 switch (mode)
50525 /* For 32-bit signed integer X, the best way to calculate the absolute
50526 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
50527 case V4SImode:
50528 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
50529 GEN_INT (GET_MODE_UNIT_BITSIZE (mode) - 1),
50530 NULL, 0, OPTAB_DIRECT);
50531 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
50532 NULL, 0, OPTAB_DIRECT);
50533 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
50534 target, 0, OPTAB_DIRECT);
50535 break;
50537 /* For 16-bit signed integer X, the best way to calculate the absolute
50538 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
50539 case V8HImode:
50540 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
50542 x = expand_simple_binop (mode, SMAX, tmp0, input,
50543 target, 0, OPTAB_DIRECT);
50544 break;
50546 /* For 8-bit signed integer X, the best way to calculate the absolute
50547 value of X is min ((unsigned char) X, (unsigned char) (-X)),
50548 as SSE2 provides the PMINUB insn. */
50549 case V16QImode:
50550 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
50552 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
50553 target, 0, OPTAB_DIRECT);
50554 break;
50556 default:
50557 gcc_unreachable ();
50560 if (x != target)
50561 emit_move_insn (target, x);
50564 /* Expand an extract from a vector register through pextr insn.
50565 Return true if successful. */
50567 bool
50568 ix86_expand_pextr (rtx *operands)
50570 rtx dst = operands[0];
50571 rtx src = operands[1];
50573 unsigned int size = INTVAL (operands[2]);
50574 unsigned int pos = INTVAL (operands[3]);
50576 if (SUBREG_P (dst))
50578 /* Reject non-lowpart subregs. */
50579 if (SUBREG_BYTE (dst) > 0)
50580 return false;
50581 dst = SUBREG_REG (dst);
50584 if (SUBREG_P (src))
50586 pos += SUBREG_BYTE (src) * BITS_PER_UNIT;
50587 src = SUBREG_REG (src);
50590 switch (GET_MODE (src))
50592 case V16QImode:
50593 case V8HImode:
50594 case V4SImode:
50595 case V2DImode:
50596 case V1TImode:
50597 case TImode:
50599 machine_mode srcmode, dstmode;
50600 rtx d, pat;
50602 dstmode = mode_for_size (size, MODE_INT, 0);
50604 switch (dstmode)
50606 case QImode:
50607 if (!TARGET_SSE4_1)
50608 return false;
50609 srcmode = V16QImode;
50610 break;
50612 case HImode:
50613 if (!TARGET_SSE2)
50614 return false;
50615 srcmode = V8HImode;
50616 break;
50618 case SImode:
50619 if (!TARGET_SSE4_1)
50620 return false;
50621 srcmode = V4SImode;
50622 break;
50624 case DImode:
50625 gcc_assert (TARGET_64BIT);
50626 if (!TARGET_SSE4_1)
50627 return false;
50628 srcmode = V2DImode;
50629 break;
50631 default:
50632 return false;
50635 /* Reject extractions from misaligned positions. */
50636 if (pos & (size-1))
50637 return false;
50639 if (GET_MODE (dst) == dstmode)
50640 d = dst;
50641 else
50642 d = gen_reg_rtx (dstmode);
50644 /* Construct insn pattern. */
50645 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (pos / size)));
50646 pat = gen_rtx_VEC_SELECT (dstmode, gen_lowpart (srcmode, src), pat);
50648 /* Let the rtl optimizers know about the zero extension performed. */
50649 if (dstmode == QImode || dstmode == HImode)
50651 pat = gen_rtx_ZERO_EXTEND (SImode, pat);
50652 d = gen_lowpart (SImode, d);
50655 emit_insn (gen_rtx_SET (d, pat));
50657 if (d != dst)
50658 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
50659 return true;
50662 default:
50663 return false;
50667 /* Expand an insert into a vector register through pinsr insn.
50668 Return true if successful. */
50670 bool
50671 ix86_expand_pinsr (rtx *operands)
50673 rtx dst = operands[0];
50674 rtx src = operands[3];
50676 unsigned int size = INTVAL (operands[1]);
50677 unsigned int pos = INTVAL (operands[2]);
50679 if (SUBREG_P (dst))
50681 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
50682 dst = SUBREG_REG (dst);
50685 switch (GET_MODE (dst))
50687 case V16QImode:
50688 case V8HImode:
50689 case V4SImode:
50690 case V2DImode:
50691 case V1TImode:
50692 case TImode:
50694 machine_mode srcmode, dstmode;
50695 rtx (*pinsr)(rtx, rtx, rtx, rtx);
50696 rtx d;
50698 srcmode = mode_for_size (size, MODE_INT, 0);
50700 switch (srcmode)
50702 case QImode:
50703 if (!TARGET_SSE4_1)
50704 return false;
50705 dstmode = V16QImode;
50706 pinsr = gen_sse4_1_pinsrb;
50707 break;
50709 case HImode:
50710 if (!TARGET_SSE2)
50711 return false;
50712 dstmode = V8HImode;
50713 pinsr = gen_sse2_pinsrw;
50714 break;
50716 case SImode:
50717 if (!TARGET_SSE4_1)
50718 return false;
50719 dstmode = V4SImode;
50720 pinsr = gen_sse4_1_pinsrd;
50721 break;
50723 case DImode:
50724 gcc_assert (TARGET_64BIT);
50725 if (!TARGET_SSE4_1)
50726 return false;
50727 dstmode = V2DImode;
50728 pinsr = gen_sse4_1_pinsrq;
50729 break;
50731 default:
50732 return false;
50735 /* Reject insertions to misaligned positions. */
50736 if (pos & (size-1))
50737 return false;
50739 if (SUBREG_P (src))
50741 unsigned int srcpos = SUBREG_BYTE (src);
50743 if (srcpos > 0)
50745 rtx extr_ops[4];
50747 extr_ops[0] = gen_reg_rtx (srcmode);
50748 extr_ops[1] = gen_lowpart (srcmode, SUBREG_REG (src));
50749 extr_ops[2] = GEN_INT (size);
50750 extr_ops[3] = GEN_INT (srcpos * BITS_PER_UNIT);
50752 if (!ix86_expand_pextr (extr_ops))
50753 return false;
50755 src = extr_ops[0];
50757 else
50758 src = gen_lowpart (srcmode, SUBREG_REG (src));
50761 if (GET_MODE (dst) == dstmode)
50762 d = dst;
50763 else
50764 d = gen_reg_rtx (dstmode);
50766 emit_insn (pinsr (d, gen_lowpart (dstmode, dst),
50767 gen_lowpart (srcmode, src),
50768 GEN_INT (1 << (pos / size))));
50769 if (d != dst)
50770 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
50771 return true;
50774 default:
50775 return false;
50779 /* This function returns the calling abi specific va_list type node.
50780 It returns the FNDECL specific va_list type. */
50782 static tree
50783 ix86_fn_abi_va_list (tree fndecl)
50785 if (!TARGET_64BIT)
50786 return va_list_type_node;
50787 gcc_assert (fndecl != NULL_TREE);
50789 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
50790 return ms_va_list_type_node;
50791 else
50792 return sysv_va_list_type_node;
50795 /* Returns the canonical va_list type specified by TYPE. If there
50796 is no valid TYPE provided, it return NULL_TREE. */
50798 static tree
50799 ix86_canonical_va_list_type (tree type)
50801 tree wtype, htype;
50803 /* Resolve references and pointers to va_list type. */
50804 if (TREE_CODE (type) == MEM_REF)
50805 type = TREE_TYPE (type);
50806 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
50807 type = TREE_TYPE (type);
50808 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
50809 type = TREE_TYPE (type);
50811 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
50813 wtype = va_list_type_node;
50814 gcc_assert (wtype != NULL_TREE);
50815 htype = type;
50816 if (TREE_CODE (wtype) == ARRAY_TYPE)
50818 /* If va_list is an array type, the argument may have decayed
50819 to a pointer type, e.g. by being passed to another function.
50820 In that case, unwrap both types so that we can compare the
50821 underlying records. */
50822 if (TREE_CODE (htype) == ARRAY_TYPE
50823 || POINTER_TYPE_P (htype))
50825 wtype = TREE_TYPE (wtype);
50826 htype = TREE_TYPE (htype);
50829 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50830 return va_list_type_node;
50831 wtype = sysv_va_list_type_node;
50832 gcc_assert (wtype != NULL_TREE);
50833 htype = type;
50834 if (TREE_CODE (wtype) == ARRAY_TYPE)
50836 /* If va_list is an array type, the argument may have decayed
50837 to a pointer type, e.g. by being passed to another function.
50838 In that case, unwrap both types so that we can compare the
50839 underlying records. */
50840 if (TREE_CODE (htype) == ARRAY_TYPE
50841 || POINTER_TYPE_P (htype))
50843 wtype = TREE_TYPE (wtype);
50844 htype = TREE_TYPE (htype);
50847 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50848 return sysv_va_list_type_node;
50849 wtype = ms_va_list_type_node;
50850 gcc_assert (wtype != NULL_TREE);
50851 htype = type;
50852 if (TREE_CODE (wtype) == ARRAY_TYPE)
50854 /* If va_list is an array type, the argument may have decayed
50855 to a pointer type, e.g. by being passed to another function.
50856 In that case, unwrap both types so that we can compare the
50857 underlying records. */
50858 if (TREE_CODE (htype) == ARRAY_TYPE
50859 || POINTER_TYPE_P (htype))
50861 wtype = TREE_TYPE (wtype);
50862 htype = TREE_TYPE (htype);
50865 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50866 return ms_va_list_type_node;
50867 return NULL_TREE;
50869 return std_canonical_va_list_type (type);
50872 /* Iterate through the target-specific builtin types for va_list.
50873 IDX denotes the iterator, *PTREE is set to the result type of
50874 the va_list builtin, and *PNAME to its internal type.
50875 Returns zero if there is no element for this index, otherwise
50876 IDX should be increased upon the next call.
50877 Note, do not iterate a base builtin's name like __builtin_va_list.
50878 Used from c_common_nodes_and_builtins. */
50880 static int
50881 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
50883 if (TARGET_64BIT)
50885 switch (idx)
50887 default:
50888 break;
50890 case 0:
50891 *ptree = ms_va_list_type_node;
50892 *pname = "__builtin_ms_va_list";
50893 return 1;
50895 case 1:
50896 *ptree = sysv_va_list_type_node;
50897 *pname = "__builtin_sysv_va_list";
50898 return 1;
50902 return 0;
50905 #undef TARGET_SCHED_DISPATCH
50906 #define TARGET_SCHED_DISPATCH has_dispatch
50907 #undef TARGET_SCHED_DISPATCH_DO
50908 #define TARGET_SCHED_DISPATCH_DO do_dispatch
50909 #undef TARGET_SCHED_REASSOCIATION_WIDTH
50910 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
50911 #undef TARGET_SCHED_REORDER
50912 #define TARGET_SCHED_REORDER ix86_sched_reorder
50913 #undef TARGET_SCHED_ADJUST_PRIORITY
50914 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
50915 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
50916 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
50917 ix86_dependencies_evaluation_hook
50919 /* The size of the dispatch window is the total number of bytes of
50920 object code allowed in a window. */
50921 #define DISPATCH_WINDOW_SIZE 16
50923 /* Number of dispatch windows considered for scheduling. */
50924 #define MAX_DISPATCH_WINDOWS 3
50926 /* Maximum number of instructions in a window. */
50927 #define MAX_INSN 4
50929 /* Maximum number of immediate operands in a window. */
50930 #define MAX_IMM 4
50932 /* Maximum number of immediate bits allowed in a window. */
50933 #define MAX_IMM_SIZE 128
50935 /* Maximum number of 32 bit immediates allowed in a window. */
50936 #define MAX_IMM_32 4
50938 /* Maximum number of 64 bit immediates allowed in a window. */
50939 #define MAX_IMM_64 2
50941 /* Maximum total of loads or prefetches allowed in a window. */
50942 #define MAX_LOAD 2
50944 /* Maximum total of stores allowed in a window. */
50945 #define MAX_STORE 1
50947 #undef BIG
50948 #define BIG 100
50951 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
50952 enum dispatch_group {
50953 disp_no_group = 0,
50954 disp_load,
50955 disp_store,
50956 disp_load_store,
50957 disp_prefetch,
50958 disp_imm,
50959 disp_imm_32,
50960 disp_imm_64,
50961 disp_branch,
50962 disp_cmp,
50963 disp_jcc,
50964 disp_last
50967 /* Number of allowable groups in a dispatch window. It is an array
50968 indexed by dispatch_group enum. 100 is used as a big number,
50969 because the number of these kind of operations does not have any
50970 effect in dispatch window, but we need them for other reasons in
50971 the table. */
50972 static unsigned int num_allowable_groups[disp_last] = {
50973 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
50976 char group_name[disp_last + 1][16] = {
50977 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
50978 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
50979 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
50982 /* Instruction path. */
50983 enum insn_path {
50984 no_path = 0,
50985 path_single, /* Single micro op. */
50986 path_double, /* Double micro op. */
50987 path_multi, /* Instructions with more than 2 micro op.. */
50988 last_path
50991 /* sched_insn_info defines a window to the instructions scheduled in
50992 the basic block. It contains a pointer to the insn_info table and
50993 the instruction scheduled.
50995 Windows are allocated for each basic block and are linked
50996 together. */
50997 typedef struct sched_insn_info_s {
50998 rtx insn;
50999 enum dispatch_group group;
51000 enum insn_path path;
51001 int byte_len;
51002 int imm_bytes;
51003 } sched_insn_info;
51005 /* Linked list of dispatch windows. This is a two way list of
51006 dispatch windows of a basic block. It contains information about
51007 the number of uops in the window and the total number of
51008 instructions and of bytes in the object code for this dispatch
51009 window. */
51010 typedef struct dispatch_windows_s {
51011 int num_insn; /* Number of insn in the window. */
51012 int num_uops; /* Number of uops in the window. */
51013 int window_size; /* Number of bytes in the window. */
51014 int window_num; /* Window number between 0 or 1. */
51015 int num_imm; /* Number of immediates in an insn. */
51016 int num_imm_32; /* Number of 32 bit immediates in an insn. */
51017 int num_imm_64; /* Number of 64 bit immediates in an insn. */
51018 int imm_size; /* Total immediates in the window. */
51019 int num_loads; /* Total memory loads in the window. */
51020 int num_stores; /* Total memory stores in the window. */
51021 int violation; /* Violation exists in window. */
51022 sched_insn_info *window; /* Pointer to the window. */
51023 struct dispatch_windows_s *next;
51024 struct dispatch_windows_s *prev;
51025 } dispatch_windows;
51027 /* Immediate valuse used in an insn. */
51028 typedef struct imm_info_s
51030 int imm;
51031 int imm32;
51032 int imm64;
51033 } imm_info;
51035 static dispatch_windows *dispatch_window_list;
51036 static dispatch_windows *dispatch_window_list1;
51038 /* Get dispatch group of insn. */
51040 static enum dispatch_group
51041 get_mem_group (rtx_insn *insn)
51043 enum attr_memory memory;
51045 if (INSN_CODE (insn) < 0)
51046 return disp_no_group;
51047 memory = get_attr_memory (insn);
51048 if (memory == MEMORY_STORE)
51049 return disp_store;
51051 if (memory == MEMORY_LOAD)
51052 return disp_load;
51054 if (memory == MEMORY_BOTH)
51055 return disp_load_store;
51057 return disp_no_group;
51060 /* Return true if insn is a compare instruction. */
51062 static bool
51063 is_cmp (rtx_insn *insn)
51065 enum attr_type type;
51067 type = get_attr_type (insn);
51068 return (type == TYPE_TEST
51069 || type == TYPE_ICMP
51070 || type == TYPE_FCMP
51071 || GET_CODE (PATTERN (insn)) == COMPARE);
51074 /* Return true if a dispatch violation encountered. */
51076 static bool
51077 dispatch_violation (void)
51079 if (dispatch_window_list->next)
51080 return dispatch_window_list->next->violation;
51081 return dispatch_window_list->violation;
51084 /* Return true if insn is a branch instruction. */
51086 static bool
51087 is_branch (rtx_insn *insn)
51089 return (CALL_P (insn) || JUMP_P (insn));
51092 /* Return true if insn is a prefetch instruction. */
51094 static bool
51095 is_prefetch (rtx_insn *insn)
51097 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
51100 /* This function initializes a dispatch window and the list container holding a
51101 pointer to the window. */
51103 static void
51104 init_window (int window_num)
51106 int i;
51107 dispatch_windows *new_list;
51109 if (window_num == 0)
51110 new_list = dispatch_window_list;
51111 else
51112 new_list = dispatch_window_list1;
51114 new_list->num_insn = 0;
51115 new_list->num_uops = 0;
51116 new_list->window_size = 0;
51117 new_list->next = NULL;
51118 new_list->prev = NULL;
51119 new_list->window_num = window_num;
51120 new_list->num_imm = 0;
51121 new_list->num_imm_32 = 0;
51122 new_list->num_imm_64 = 0;
51123 new_list->imm_size = 0;
51124 new_list->num_loads = 0;
51125 new_list->num_stores = 0;
51126 new_list->violation = false;
51128 for (i = 0; i < MAX_INSN; i++)
51130 new_list->window[i].insn = NULL;
51131 new_list->window[i].group = disp_no_group;
51132 new_list->window[i].path = no_path;
51133 new_list->window[i].byte_len = 0;
51134 new_list->window[i].imm_bytes = 0;
51136 return;
51139 /* This function allocates and initializes a dispatch window and the
51140 list container holding a pointer to the window. */
51142 static dispatch_windows *
51143 allocate_window (void)
51145 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
51146 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
51148 return new_list;
51151 /* This routine initializes the dispatch scheduling information. It
51152 initiates building dispatch scheduler tables and constructs the
51153 first dispatch window. */
51155 static void
51156 init_dispatch_sched (void)
51158 /* Allocate a dispatch list and a window. */
51159 dispatch_window_list = allocate_window ();
51160 dispatch_window_list1 = allocate_window ();
51161 init_window (0);
51162 init_window (1);
51165 /* This function returns true if a branch is detected. End of a basic block
51166 does not have to be a branch, but here we assume only branches end a
51167 window. */
51169 static bool
51170 is_end_basic_block (enum dispatch_group group)
51172 return group == disp_branch;
51175 /* This function is called when the end of a window processing is reached. */
51177 static void
51178 process_end_window (void)
51180 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
51181 if (dispatch_window_list->next)
51183 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
51184 gcc_assert (dispatch_window_list->window_size
51185 + dispatch_window_list1->window_size <= 48);
51186 init_window (1);
51188 init_window (0);
51191 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
51192 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
51193 for 48 bytes of instructions. Note that these windows are not dispatch
51194 windows that their sizes are DISPATCH_WINDOW_SIZE. */
51196 static dispatch_windows *
51197 allocate_next_window (int window_num)
51199 if (window_num == 0)
51201 if (dispatch_window_list->next)
51202 init_window (1);
51203 init_window (0);
51204 return dispatch_window_list;
51207 dispatch_window_list->next = dispatch_window_list1;
51208 dispatch_window_list1->prev = dispatch_window_list;
51210 return dispatch_window_list1;
51213 /* Compute number of immediate operands of an instruction. */
51215 static void
51216 find_constant (rtx in_rtx, imm_info *imm_values)
51218 if (INSN_P (in_rtx))
51219 in_rtx = PATTERN (in_rtx);
51220 subrtx_iterator::array_type array;
51221 FOR_EACH_SUBRTX (iter, array, in_rtx, ALL)
51222 if (const_rtx x = *iter)
51223 switch (GET_CODE (x))
51225 case CONST:
51226 case SYMBOL_REF:
51227 case CONST_INT:
51228 (imm_values->imm)++;
51229 if (x86_64_immediate_operand (CONST_CAST_RTX (x), SImode))
51230 (imm_values->imm32)++;
51231 else
51232 (imm_values->imm64)++;
51233 break;
51235 case CONST_DOUBLE:
51236 case CONST_WIDE_INT:
51237 (imm_values->imm)++;
51238 (imm_values->imm64)++;
51239 break;
51241 case CODE_LABEL:
51242 if (LABEL_KIND (x) == LABEL_NORMAL)
51244 (imm_values->imm)++;
51245 (imm_values->imm32)++;
51247 break;
51249 default:
51250 break;
51254 /* Return total size of immediate operands of an instruction along with number
51255 of corresponding immediate-operands. It initializes its parameters to zero
51256 befor calling FIND_CONSTANT.
51257 INSN is the input instruction. IMM is the total of immediates.
51258 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
51259 bit immediates. */
51261 static int
51262 get_num_immediates (rtx_insn *insn, int *imm, int *imm32, int *imm64)
51264 imm_info imm_values = {0, 0, 0};
51266 find_constant (insn, &imm_values);
51267 *imm = imm_values.imm;
51268 *imm32 = imm_values.imm32;
51269 *imm64 = imm_values.imm64;
51270 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
51273 /* This function indicates if an operand of an instruction is an
51274 immediate. */
51276 static bool
51277 has_immediate (rtx_insn *insn)
51279 int num_imm_operand;
51280 int num_imm32_operand;
51281 int num_imm64_operand;
51283 if (insn)
51284 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
51285 &num_imm64_operand);
51286 return false;
51289 /* Return single or double path for instructions. */
51291 static enum insn_path
51292 get_insn_path (rtx_insn *insn)
51294 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
51296 if ((int)path == 0)
51297 return path_single;
51299 if ((int)path == 1)
51300 return path_double;
51302 return path_multi;
51305 /* Return insn dispatch group. */
51307 static enum dispatch_group
51308 get_insn_group (rtx_insn *insn)
51310 enum dispatch_group group = get_mem_group (insn);
51311 if (group)
51312 return group;
51314 if (is_branch (insn))
51315 return disp_branch;
51317 if (is_cmp (insn))
51318 return disp_cmp;
51320 if (has_immediate (insn))
51321 return disp_imm;
51323 if (is_prefetch (insn))
51324 return disp_prefetch;
51326 return disp_no_group;
51329 /* Count number of GROUP restricted instructions in a dispatch
51330 window WINDOW_LIST. */
51332 static int
51333 count_num_restricted (rtx_insn *insn, dispatch_windows *window_list)
51335 enum dispatch_group group = get_insn_group (insn);
51336 int imm_size;
51337 int num_imm_operand;
51338 int num_imm32_operand;
51339 int num_imm64_operand;
51341 if (group == disp_no_group)
51342 return 0;
51344 if (group == disp_imm)
51346 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
51347 &num_imm64_operand);
51348 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
51349 || num_imm_operand + window_list->num_imm > MAX_IMM
51350 || (num_imm32_operand > 0
51351 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
51352 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
51353 || (num_imm64_operand > 0
51354 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
51355 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
51356 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
51357 && num_imm64_operand > 0
51358 && ((window_list->num_imm_64 > 0
51359 && window_list->num_insn >= 2)
51360 || window_list->num_insn >= 3)))
51361 return BIG;
51363 return 1;
51366 if ((group == disp_load_store
51367 && (window_list->num_loads >= MAX_LOAD
51368 || window_list->num_stores >= MAX_STORE))
51369 || ((group == disp_load
51370 || group == disp_prefetch)
51371 && window_list->num_loads >= MAX_LOAD)
51372 || (group == disp_store
51373 && window_list->num_stores >= MAX_STORE))
51374 return BIG;
51376 return 1;
51379 /* This function returns true if insn satisfies dispatch rules on the
51380 last window scheduled. */
51382 static bool
51383 fits_dispatch_window (rtx_insn *insn)
51385 dispatch_windows *window_list = dispatch_window_list;
51386 dispatch_windows *window_list_next = dispatch_window_list->next;
51387 unsigned int num_restrict;
51388 enum dispatch_group group = get_insn_group (insn);
51389 enum insn_path path = get_insn_path (insn);
51390 int sum;
51392 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
51393 instructions should be given the lowest priority in the
51394 scheduling process in Haifa scheduler to make sure they will be
51395 scheduled in the same dispatch window as the reference to them. */
51396 if (group == disp_jcc || group == disp_cmp)
51397 return false;
51399 /* Check nonrestricted. */
51400 if (group == disp_no_group || group == disp_branch)
51401 return true;
51403 /* Get last dispatch window. */
51404 if (window_list_next)
51405 window_list = window_list_next;
51407 if (window_list->window_num == 1)
51409 sum = window_list->prev->window_size + window_list->window_size;
51411 if (sum == 32
51412 || (min_insn_size (insn) + sum) >= 48)
51413 /* Window 1 is full. Go for next window. */
51414 return true;
51417 num_restrict = count_num_restricted (insn, window_list);
51419 if (num_restrict > num_allowable_groups[group])
51420 return false;
51422 /* See if it fits in the first window. */
51423 if (window_list->window_num == 0)
51425 /* The first widow should have only single and double path
51426 uops. */
51427 if (path == path_double
51428 && (window_list->num_uops + 2) > MAX_INSN)
51429 return false;
51430 else if (path != path_single)
51431 return false;
51433 return true;
51436 /* Add an instruction INSN with NUM_UOPS micro-operations to the
51437 dispatch window WINDOW_LIST. */
51439 static void
51440 add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops)
51442 int byte_len = min_insn_size (insn);
51443 int num_insn = window_list->num_insn;
51444 int imm_size;
51445 sched_insn_info *window = window_list->window;
51446 enum dispatch_group group = get_insn_group (insn);
51447 enum insn_path path = get_insn_path (insn);
51448 int num_imm_operand;
51449 int num_imm32_operand;
51450 int num_imm64_operand;
51452 if (!window_list->violation && group != disp_cmp
51453 && !fits_dispatch_window (insn))
51454 window_list->violation = true;
51456 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
51457 &num_imm64_operand);
51459 /* Initialize window with new instruction. */
51460 window[num_insn].insn = insn;
51461 window[num_insn].byte_len = byte_len;
51462 window[num_insn].group = group;
51463 window[num_insn].path = path;
51464 window[num_insn].imm_bytes = imm_size;
51466 window_list->window_size += byte_len;
51467 window_list->num_insn = num_insn + 1;
51468 window_list->num_uops = window_list->num_uops + num_uops;
51469 window_list->imm_size += imm_size;
51470 window_list->num_imm += num_imm_operand;
51471 window_list->num_imm_32 += num_imm32_operand;
51472 window_list->num_imm_64 += num_imm64_operand;
51474 if (group == disp_store)
51475 window_list->num_stores += 1;
51476 else if (group == disp_load
51477 || group == disp_prefetch)
51478 window_list->num_loads += 1;
51479 else if (group == disp_load_store)
51481 window_list->num_stores += 1;
51482 window_list->num_loads += 1;
51486 /* Adds a scheduled instruction, INSN, to the current dispatch window.
51487 If the total bytes of instructions or the number of instructions in
51488 the window exceed allowable, it allocates a new window. */
51490 static void
51491 add_to_dispatch_window (rtx_insn *insn)
51493 int byte_len;
51494 dispatch_windows *window_list;
51495 dispatch_windows *next_list;
51496 dispatch_windows *window0_list;
51497 enum insn_path path;
51498 enum dispatch_group insn_group;
51499 bool insn_fits;
51500 int num_insn;
51501 int num_uops;
51502 int window_num;
51503 int insn_num_uops;
51504 int sum;
51506 if (INSN_CODE (insn) < 0)
51507 return;
51509 byte_len = min_insn_size (insn);
51510 window_list = dispatch_window_list;
51511 next_list = window_list->next;
51512 path = get_insn_path (insn);
51513 insn_group = get_insn_group (insn);
51515 /* Get the last dispatch window. */
51516 if (next_list)
51517 window_list = dispatch_window_list->next;
51519 if (path == path_single)
51520 insn_num_uops = 1;
51521 else if (path == path_double)
51522 insn_num_uops = 2;
51523 else
51524 insn_num_uops = (int) path;
51526 /* If current window is full, get a new window.
51527 Window number zero is full, if MAX_INSN uops are scheduled in it.
51528 Window number one is full, if window zero's bytes plus window
51529 one's bytes is 32, or if the bytes of the new instruction added
51530 to the total makes it greater than 48, or it has already MAX_INSN
51531 instructions in it. */
51532 num_insn = window_list->num_insn;
51533 num_uops = window_list->num_uops;
51534 window_num = window_list->window_num;
51535 insn_fits = fits_dispatch_window (insn);
51537 if (num_insn >= MAX_INSN
51538 || num_uops + insn_num_uops > MAX_INSN
51539 || !(insn_fits))
51541 window_num = ~window_num & 1;
51542 window_list = allocate_next_window (window_num);
51545 if (window_num == 0)
51547 add_insn_window (insn, window_list, insn_num_uops);
51548 if (window_list->num_insn >= MAX_INSN
51549 && insn_group == disp_branch)
51551 process_end_window ();
51552 return;
51555 else if (window_num == 1)
51557 window0_list = window_list->prev;
51558 sum = window0_list->window_size + window_list->window_size;
51559 if (sum == 32
51560 || (byte_len + sum) >= 48)
51562 process_end_window ();
51563 window_list = dispatch_window_list;
51566 add_insn_window (insn, window_list, insn_num_uops);
51568 else
51569 gcc_unreachable ();
51571 if (is_end_basic_block (insn_group))
51573 /* End of basic block is reached do end-basic-block process. */
51574 process_end_window ();
51575 return;
51579 /* Print the dispatch window, WINDOW_NUM, to FILE. */
51581 DEBUG_FUNCTION static void
51582 debug_dispatch_window_file (FILE *file, int window_num)
51584 dispatch_windows *list;
51585 int i;
51587 if (window_num == 0)
51588 list = dispatch_window_list;
51589 else
51590 list = dispatch_window_list1;
51592 fprintf (file, "Window #%d:\n", list->window_num);
51593 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
51594 list->num_insn, list->num_uops, list->window_size);
51595 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
51596 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
51598 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
51599 list->num_stores);
51600 fprintf (file, " insn info:\n");
51602 for (i = 0; i < MAX_INSN; i++)
51604 if (!list->window[i].insn)
51605 break;
51606 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
51607 i, group_name[list->window[i].group],
51608 i, (void *)list->window[i].insn,
51609 i, list->window[i].path,
51610 i, list->window[i].byte_len,
51611 i, list->window[i].imm_bytes);
51615 /* Print to stdout a dispatch window. */
51617 DEBUG_FUNCTION void
51618 debug_dispatch_window (int window_num)
51620 debug_dispatch_window_file (stdout, window_num);
51623 /* Print INSN dispatch information to FILE. */
51625 DEBUG_FUNCTION static void
51626 debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn)
51628 int byte_len;
51629 enum insn_path path;
51630 enum dispatch_group group;
51631 int imm_size;
51632 int num_imm_operand;
51633 int num_imm32_operand;
51634 int num_imm64_operand;
51636 if (INSN_CODE (insn) < 0)
51637 return;
51639 byte_len = min_insn_size (insn);
51640 path = get_insn_path (insn);
51641 group = get_insn_group (insn);
51642 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
51643 &num_imm64_operand);
51645 fprintf (file, " insn info:\n");
51646 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
51647 group_name[group], path, byte_len);
51648 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
51649 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
51652 /* Print to STDERR the status of the ready list with respect to
51653 dispatch windows. */
51655 DEBUG_FUNCTION void
51656 debug_ready_dispatch (void)
51658 int i;
51659 int no_ready = number_in_ready ();
51661 fprintf (stdout, "Number of ready: %d\n", no_ready);
51663 for (i = 0; i < no_ready; i++)
51664 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
51667 /* This routine is the driver of the dispatch scheduler. */
51669 static void
51670 do_dispatch (rtx_insn *insn, int mode)
51672 if (mode == DISPATCH_INIT)
51673 init_dispatch_sched ();
51674 else if (mode == ADD_TO_DISPATCH_WINDOW)
51675 add_to_dispatch_window (insn);
51678 /* Return TRUE if Dispatch Scheduling is supported. */
51680 static bool
51681 has_dispatch (rtx_insn *insn, int action)
51683 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
51684 && flag_dispatch_scheduler)
51685 switch (action)
51687 default:
51688 return false;
51690 case IS_DISPATCH_ON:
51691 return true;
51692 break;
51694 case IS_CMP:
51695 return is_cmp (insn);
51697 case DISPATCH_VIOLATION:
51698 return dispatch_violation ();
51700 case FITS_DISPATCH_WINDOW:
51701 return fits_dispatch_window (insn);
51704 return false;
51707 /* Implementation of reassociation_width target hook used by
51708 reassoc phase to identify parallelism level in reassociated
51709 tree. Statements tree_code is passed in OPC. Arguments type
51710 is passed in MODE.
51712 Currently parallel reassociation is enabled for Atom
51713 processors only and we set reassociation width to be 2
51714 because Atom may issue up to 2 instructions per cycle.
51716 Return value should be fixed if parallel reassociation is
51717 enabled for other processors. */
51719 static int
51720 ix86_reassociation_width (unsigned int, machine_mode mode)
51722 /* Vector part. */
51723 if (VECTOR_MODE_P (mode))
51725 if (TARGET_VECTOR_PARALLEL_EXECUTION)
51726 return 2;
51727 else
51728 return 1;
51731 /* Scalar part. */
51732 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
51733 return 2;
51734 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
51735 return 2;
51736 else
51737 return 1;
51740 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
51741 place emms and femms instructions. */
51743 static machine_mode
51744 ix86_preferred_simd_mode (machine_mode mode)
51746 if (!TARGET_SSE)
51747 return word_mode;
51749 switch (mode)
51751 case QImode:
51752 return TARGET_AVX512BW ? V64QImode :
51753 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
51754 case HImode:
51755 return TARGET_AVX512BW ? V32HImode :
51756 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
51757 case SImode:
51758 return TARGET_AVX512F ? V16SImode :
51759 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
51760 case DImode:
51761 return TARGET_AVX512F ? V8DImode :
51762 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
51764 case SFmode:
51765 if (TARGET_AVX512F)
51766 return V16SFmode;
51767 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
51768 return V8SFmode;
51769 else
51770 return V4SFmode;
51772 case DFmode:
51773 if (!TARGET_VECTORIZE_DOUBLE)
51774 return word_mode;
51775 else if (TARGET_AVX512F)
51776 return V8DFmode;
51777 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
51778 return V4DFmode;
51779 else if (TARGET_SSE2)
51780 return V2DFmode;
51781 /* FALLTHRU */
51783 default:
51784 return word_mode;
51788 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
51789 vectors. If AVX512F is enabled then try vectorizing with 512bit,
51790 256bit and 128bit vectors. */
51792 static unsigned int
51793 ix86_autovectorize_vector_sizes (void)
51795 return TARGET_AVX512F ? 64 | 32 | 16 :
51796 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
51801 /* Return class of registers which could be used for pseudo of MODE
51802 and of class RCLASS for spilling instead of memory. Return NO_REGS
51803 if it is not possible or non-profitable. */
51804 static reg_class_t
51805 ix86_spill_class (reg_class_t rclass, machine_mode mode)
51807 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
51808 && (mode == SImode || (TARGET_64BIT && mode == DImode))
51809 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
51810 return ALL_SSE_REGS;
51811 return NO_REGS;
51814 /* Implement targetm.vectorize.init_cost. */
51816 static void *
51817 ix86_init_cost (struct loop *)
51819 unsigned *cost = XNEWVEC (unsigned, 3);
51820 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
51821 return cost;
51824 /* Implement targetm.vectorize.add_stmt_cost. */
51826 static unsigned
51827 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
51828 struct _stmt_vec_info *stmt_info, int misalign,
51829 enum vect_cost_model_location where)
51831 unsigned *cost = (unsigned *) data;
51832 unsigned retval = 0;
51834 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
51835 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
51837 /* Statements in an inner loop relative to the loop being
51838 vectorized are weighted more heavily. The value here is
51839 arbitrary and could potentially be improved with analysis. */
51840 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
51841 count *= 50; /* FIXME. */
51843 retval = (unsigned) (count * stmt_cost);
51845 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
51846 for Silvermont as it has out of order integer pipeline and can execute
51847 2 scalar instruction per tick, but has in order SIMD pipeline. */
51848 if (TARGET_SILVERMONT || TARGET_INTEL)
51849 if (stmt_info && stmt_info->stmt)
51851 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
51852 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
51853 retval = (retval * 17) / 10;
51856 cost[where] += retval;
51858 return retval;
51861 /* Implement targetm.vectorize.finish_cost. */
51863 static void
51864 ix86_finish_cost (void *data, unsigned *prologue_cost,
51865 unsigned *body_cost, unsigned *epilogue_cost)
51867 unsigned *cost = (unsigned *) data;
51868 *prologue_cost = cost[vect_prologue];
51869 *body_cost = cost[vect_body];
51870 *epilogue_cost = cost[vect_epilogue];
51873 /* Implement targetm.vectorize.destroy_cost_data. */
51875 static void
51876 ix86_destroy_cost_data (void *data)
51878 free (data);
51881 /* Validate target specific memory model bits in VAL. */
51883 static unsigned HOST_WIDE_INT
51884 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
51886 enum memmodel model = memmodel_from_int (val);
51887 bool strong;
51889 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
51890 |MEMMODEL_MASK)
51891 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
51893 warning (OPT_Winvalid_memory_model,
51894 "Unknown architecture specific memory model");
51895 return MEMMODEL_SEQ_CST;
51897 strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
51898 if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
51900 warning (OPT_Winvalid_memory_model,
51901 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
51902 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
51904 if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
51906 warning (OPT_Winvalid_memory_model,
51907 "HLE_RELEASE not used with RELEASE or stronger memory model");
51908 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
51910 return val;
51913 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
51914 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
51915 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
51916 or number of vecsize_mangle variants that should be emitted. */
51918 static int
51919 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
51920 struct cgraph_simd_clone *clonei,
51921 tree base_type, int num)
51923 int ret = 1;
51925 if (clonei->simdlen
51926 && (clonei->simdlen < 2
51927 || clonei->simdlen > 16
51928 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
51930 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51931 "unsupported simdlen %d", clonei->simdlen);
51932 return 0;
51935 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
51936 if (TREE_CODE (ret_type) != VOID_TYPE)
51937 switch (TYPE_MODE (ret_type))
51939 case QImode:
51940 case HImode:
51941 case SImode:
51942 case DImode:
51943 case SFmode:
51944 case DFmode:
51945 /* case SCmode: */
51946 /* case DCmode: */
51947 break;
51948 default:
51949 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51950 "unsupported return type %qT for simd\n", ret_type);
51951 return 0;
51954 tree t;
51955 int i;
51957 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
51958 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
51959 switch (TYPE_MODE (TREE_TYPE (t)))
51961 case QImode:
51962 case HImode:
51963 case SImode:
51964 case DImode:
51965 case SFmode:
51966 case DFmode:
51967 /* case SCmode: */
51968 /* case DCmode: */
51969 break;
51970 default:
51971 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51972 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
51973 return 0;
51976 if (clonei->cilk_elemental)
51978 /* Parse here processor clause. If not present, default to 'b'. */
51979 clonei->vecsize_mangle = 'b';
51981 else if (!TREE_PUBLIC (node->decl))
51983 /* If the function isn't exported, we can pick up just one ISA
51984 for the clones. */
51985 if (TARGET_AVX2)
51986 clonei->vecsize_mangle = 'd';
51987 else if (TARGET_AVX)
51988 clonei->vecsize_mangle = 'c';
51989 else
51990 clonei->vecsize_mangle = 'b';
51991 ret = 1;
51993 else
51995 clonei->vecsize_mangle = "bcd"[num];
51996 ret = 3;
51998 switch (clonei->vecsize_mangle)
52000 case 'b':
52001 clonei->vecsize_int = 128;
52002 clonei->vecsize_float = 128;
52003 break;
52004 case 'c':
52005 clonei->vecsize_int = 128;
52006 clonei->vecsize_float = 256;
52007 break;
52008 case 'd':
52009 clonei->vecsize_int = 256;
52010 clonei->vecsize_float = 256;
52011 break;
52013 if (clonei->simdlen == 0)
52015 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
52016 clonei->simdlen = clonei->vecsize_int;
52017 else
52018 clonei->simdlen = clonei->vecsize_float;
52019 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
52020 if (clonei->simdlen > 16)
52021 clonei->simdlen = 16;
52023 return ret;
52026 /* Add target attribute to SIMD clone NODE if needed. */
52028 static void
52029 ix86_simd_clone_adjust (struct cgraph_node *node)
52031 const char *str = NULL;
52032 gcc_assert (node->decl == cfun->decl);
52033 switch (node->simdclone->vecsize_mangle)
52035 case 'b':
52036 if (!TARGET_SSE2)
52037 str = "sse2";
52038 break;
52039 case 'c':
52040 if (!TARGET_AVX)
52041 str = "avx";
52042 break;
52043 case 'd':
52044 if (!TARGET_AVX2)
52045 str = "avx2";
52046 break;
52047 default:
52048 gcc_unreachable ();
52050 if (str == NULL)
52051 return;
52052 push_cfun (NULL);
52053 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
52054 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
52055 gcc_assert (ok);
52056 pop_cfun ();
52057 ix86_reset_previous_fndecl ();
52058 ix86_set_current_function (node->decl);
52061 /* If SIMD clone NODE can't be used in a vectorized loop
52062 in current function, return -1, otherwise return a badness of using it
52063 (0 if it is most desirable from vecsize_mangle point of view, 1
52064 slightly less desirable, etc.). */
52066 static int
52067 ix86_simd_clone_usable (struct cgraph_node *node)
52069 switch (node->simdclone->vecsize_mangle)
52071 case 'b':
52072 if (!TARGET_SSE2)
52073 return -1;
52074 if (!TARGET_AVX)
52075 return 0;
52076 return TARGET_AVX2 ? 2 : 1;
52077 case 'c':
52078 if (!TARGET_AVX)
52079 return -1;
52080 return TARGET_AVX2 ? 1 : 0;
52081 break;
52082 case 'd':
52083 if (!TARGET_AVX2)
52084 return -1;
52085 return 0;
52086 default:
52087 gcc_unreachable ();
52091 /* This function adjusts the unroll factor based on
52092 the hardware capabilities. For ex, bdver3 has
52093 a loop buffer which makes unrolling of smaller
52094 loops less important. This function decides the
52095 unroll factor using number of memory references
52096 (value 32 is used) as a heuristic. */
52098 static unsigned
52099 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
52101 basic_block *bbs;
52102 rtx_insn *insn;
52103 unsigned i;
52104 unsigned mem_count = 0;
52106 if (!TARGET_ADJUST_UNROLL)
52107 return nunroll;
52109 /* Count the number of memory references within the loop body.
52110 This value determines the unrolling factor for bdver3 and bdver4
52111 architectures. */
52112 subrtx_iterator::array_type array;
52113 bbs = get_loop_body (loop);
52114 for (i = 0; i < loop->num_nodes; i++)
52115 FOR_BB_INSNS (bbs[i], insn)
52116 if (NONDEBUG_INSN_P (insn))
52117 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
52118 if (const_rtx x = *iter)
52119 if (MEM_P (x))
52121 machine_mode mode = GET_MODE (x);
52122 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
52123 if (n_words > 4)
52124 mem_count += 2;
52125 else
52126 mem_count += 1;
52128 free (bbs);
52130 if (mem_count && mem_count <=32)
52131 return 32/mem_count;
52133 return nunroll;
52137 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
52139 static bool
52140 ix86_float_exceptions_rounding_supported_p (void)
52142 /* For x87 floating point with standard excess precision handling,
52143 there is no adddf3 pattern (since x87 floating point only has
52144 XFmode operations) so the default hook implementation gets this
52145 wrong. */
52146 return TARGET_80387 || TARGET_SSE_MATH;
52149 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
52151 static void
52152 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
52154 if (!TARGET_80387 && !TARGET_SSE_MATH)
52155 return;
52156 tree exceptions_var = create_tmp_var (integer_type_node);
52157 if (TARGET_80387)
52159 tree fenv_index_type = build_index_type (size_int (6));
52160 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
52161 tree fenv_var = create_tmp_var (fenv_type);
52162 mark_addressable (fenv_var);
52163 tree fenv_ptr = build_pointer_type (fenv_type);
52164 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
52165 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
52166 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
52167 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
52168 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
52169 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
52170 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
52171 tree hold_fnclex = build_call_expr (fnclex, 0);
52172 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
52173 hold_fnclex);
52174 *clear = build_call_expr (fnclex, 0);
52175 tree sw_var = create_tmp_var (short_unsigned_type_node);
52176 tree fnstsw_call = build_call_expr (fnstsw, 0);
52177 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
52178 sw_var, fnstsw_call);
52179 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
52180 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
52181 exceptions_var, exceptions_x87);
52182 *update = build2 (COMPOUND_EXPR, integer_type_node,
52183 sw_mod, update_mod);
52184 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
52185 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
52187 if (TARGET_SSE_MATH)
52189 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node);
52190 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node);
52191 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
52192 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
52193 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
52194 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
52195 mxcsr_orig_var, stmxcsr_hold_call);
52196 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
52197 mxcsr_orig_var,
52198 build_int_cst (unsigned_type_node, 0x1f80));
52199 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
52200 build_int_cst (unsigned_type_node, 0xffffffc0));
52201 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
52202 mxcsr_mod_var, hold_mod_val);
52203 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
52204 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
52205 hold_assign_orig, hold_assign_mod);
52206 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
52207 ldmxcsr_hold_call);
52208 if (*hold)
52209 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
52210 else
52211 *hold = hold_all;
52212 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
52213 if (*clear)
52214 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
52215 ldmxcsr_clear_call);
52216 else
52217 *clear = ldmxcsr_clear_call;
52218 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
52219 tree exceptions_sse = fold_convert (integer_type_node,
52220 stxmcsr_update_call);
52221 if (*update)
52223 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
52224 exceptions_var, exceptions_sse);
52225 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
52226 exceptions_var, exceptions_mod);
52227 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
52228 exceptions_assign);
52230 else
52231 *update = build2 (MODIFY_EXPR, integer_type_node,
52232 exceptions_var, exceptions_sse);
52233 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
52234 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
52235 ldmxcsr_update_call);
52237 tree atomic_feraiseexcept
52238 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
52239 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
52240 1, exceptions_var);
52241 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
52242 atomic_feraiseexcept_call);
52245 /* Return mode to be used for bounds or VOIDmode
52246 if bounds are not supported. */
52248 static enum machine_mode
52249 ix86_mpx_bound_mode ()
52251 /* Do not support pointer checker if MPX
52252 is not enabled. */
52253 if (!TARGET_MPX)
52255 if (flag_check_pointer_bounds)
52256 warning (0, "Pointer Checker requires MPX support on this target."
52257 " Use -mmpx options to enable MPX.");
52258 return VOIDmode;
52261 return BNDmode;
52264 /* Return constant used to statically initialize constant bounds.
52266 This function is used to create special bound values. For now
52267 only INIT bounds and NONE bounds are expected. More special
52268 values may be added later. */
52270 static tree
52271 ix86_make_bounds_constant (HOST_WIDE_INT lb, HOST_WIDE_INT ub)
52273 tree low = lb ? build_minus_one_cst (pointer_sized_int_node)
52274 : build_zero_cst (pointer_sized_int_node);
52275 tree high = ub ? build_zero_cst (pointer_sized_int_node)
52276 : build_minus_one_cst (pointer_sized_int_node);
52278 /* This function is supposed to be used to create INIT and
52279 NONE bounds only. */
52280 gcc_assert ((lb == 0 && ub == -1)
52281 || (lb == -1 && ub == 0));
52283 return build_complex (NULL, low, high);
52286 /* Generate a list of statements STMTS to initialize pointer bounds
52287 variable VAR with bounds LB and UB. Return the number of generated
52288 statements. */
52290 static int
52291 ix86_initialize_bounds (tree var, tree lb, tree ub, tree *stmts)
52293 tree bnd_ptr = build_pointer_type (pointer_sized_int_node);
52294 tree lhs, modify, var_p;
52296 ub = build1 (BIT_NOT_EXPR, pointer_sized_int_node, ub);
52297 var_p = fold_convert (bnd_ptr, build_fold_addr_expr (var));
52299 lhs = build1 (INDIRECT_REF, pointer_sized_int_node, var_p);
52300 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, lb);
52301 append_to_statement_list (modify, stmts);
52303 lhs = build1 (INDIRECT_REF, pointer_sized_int_node,
52304 build2 (POINTER_PLUS_EXPR, bnd_ptr, var_p,
52305 TYPE_SIZE_UNIT (pointer_sized_int_node)));
52306 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, ub);
52307 append_to_statement_list (modify, stmts);
52309 return 2;
52312 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
52313 /* For i386, common symbol is local only for non-PIE binaries. For
52314 x86-64, common symbol is local only for non-PIE binaries or linker
52315 supports copy reloc in PIE binaries. */
52317 static bool
52318 ix86_binds_local_p (const_tree exp)
52320 return default_binds_local_p_3 (exp, flag_shlib != 0, true, true,
52321 (!flag_pic
52322 || (TARGET_64BIT
52323 && HAVE_LD_PIE_COPYRELOC != 0)));
52325 #endif
52327 /* If MEM is in the form of [base+offset], extract the two parts
52328 of address and set to BASE and OFFSET, otherwise return false. */
52330 static bool
52331 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
52333 rtx addr;
52335 gcc_assert (MEM_P (mem));
52337 addr = XEXP (mem, 0);
52339 if (GET_CODE (addr) == CONST)
52340 addr = XEXP (addr, 0);
52342 if (REG_P (addr) || GET_CODE (addr) == SYMBOL_REF)
52344 *base = addr;
52345 *offset = const0_rtx;
52346 return true;
52349 if (GET_CODE (addr) == PLUS
52350 && (REG_P (XEXP (addr, 0))
52351 || GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
52352 && CONST_INT_P (XEXP (addr, 1)))
52354 *base = XEXP (addr, 0);
52355 *offset = XEXP (addr, 1);
52356 return true;
52359 return false;
52362 /* Given OPERANDS of consecutive load/store, check if we can merge
52363 them into move multiple. LOAD is true if they are load instructions.
52364 MODE is the mode of memory operands. */
52366 bool
52367 ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
52368 enum machine_mode mode)
52370 HOST_WIDE_INT offval_1, offval_2, msize;
52371 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
52373 if (load)
52375 mem_1 = operands[1];
52376 mem_2 = operands[3];
52377 reg_1 = operands[0];
52378 reg_2 = operands[2];
52380 else
52382 mem_1 = operands[0];
52383 mem_2 = operands[2];
52384 reg_1 = operands[1];
52385 reg_2 = operands[3];
52388 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
52390 if (REGNO (reg_1) != REGNO (reg_2))
52391 return false;
52393 /* Check if the addresses are in the form of [base+offset]. */
52394 if (!extract_base_offset_in_addr (mem_1, &base_1, &offset_1))
52395 return false;
52396 if (!extract_base_offset_in_addr (mem_2, &base_2, &offset_2))
52397 return false;
52399 /* Check if the bases are the same. */
52400 if (!rtx_equal_p (base_1, base_2))
52401 return false;
52403 offval_1 = INTVAL (offset_1);
52404 offval_2 = INTVAL (offset_2);
52405 msize = GET_MODE_SIZE (mode);
52406 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
52407 if (offval_1 + msize != offval_2)
52408 return false;
52410 return true;
52413 /* Initialize the GCC target structure. */
52414 #undef TARGET_RETURN_IN_MEMORY
52415 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
52417 #undef TARGET_LEGITIMIZE_ADDRESS
52418 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
52420 #undef TARGET_ATTRIBUTE_TABLE
52421 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
52422 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
52423 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
52424 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
52425 # undef TARGET_MERGE_DECL_ATTRIBUTES
52426 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
52427 #endif
52429 #undef TARGET_COMP_TYPE_ATTRIBUTES
52430 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
52432 #undef TARGET_INIT_BUILTINS
52433 #define TARGET_INIT_BUILTINS ix86_init_builtins
52434 #undef TARGET_BUILTIN_DECL
52435 #define TARGET_BUILTIN_DECL ix86_builtin_decl
52436 #undef TARGET_EXPAND_BUILTIN
52437 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
52439 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
52440 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
52441 ix86_builtin_vectorized_function
52443 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
52444 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
52446 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
52447 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
52449 #undef TARGET_VECTORIZE_BUILTIN_GATHER
52450 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
52452 #undef TARGET_VECTORIZE_BUILTIN_SCATTER
52453 #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
52455 #undef TARGET_BUILTIN_RECIPROCAL
52456 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
52458 #undef TARGET_ASM_FUNCTION_EPILOGUE
52459 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
52461 #undef TARGET_ENCODE_SECTION_INFO
52462 #ifndef SUBTARGET_ENCODE_SECTION_INFO
52463 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
52464 #else
52465 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
52466 #endif
52468 #undef TARGET_ASM_OPEN_PAREN
52469 #define TARGET_ASM_OPEN_PAREN ""
52470 #undef TARGET_ASM_CLOSE_PAREN
52471 #define TARGET_ASM_CLOSE_PAREN ""
52473 #undef TARGET_ASM_BYTE_OP
52474 #define TARGET_ASM_BYTE_OP ASM_BYTE
52476 #undef TARGET_ASM_ALIGNED_HI_OP
52477 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
52478 #undef TARGET_ASM_ALIGNED_SI_OP
52479 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
52480 #ifdef ASM_QUAD
52481 #undef TARGET_ASM_ALIGNED_DI_OP
52482 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
52483 #endif
52485 #undef TARGET_PROFILE_BEFORE_PROLOGUE
52486 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
52488 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
52489 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
52491 #undef TARGET_ASM_UNALIGNED_HI_OP
52492 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
52493 #undef TARGET_ASM_UNALIGNED_SI_OP
52494 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
52495 #undef TARGET_ASM_UNALIGNED_DI_OP
52496 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
52498 #undef TARGET_PRINT_OPERAND
52499 #define TARGET_PRINT_OPERAND ix86_print_operand
52500 #undef TARGET_PRINT_OPERAND_ADDRESS
52501 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
52502 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
52503 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
52504 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
52505 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
52507 #undef TARGET_SCHED_INIT_GLOBAL
52508 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
52509 #undef TARGET_SCHED_ADJUST_COST
52510 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
52511 #undef TARGET_SCHED_ISSUE_RATE
52512 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
52513 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
52514 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
52515 ia32_multipass_dfa_lookahead
52516 #undef TARGET_SCHED_MACRO_FUSION_P
52517 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
52518 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
52519 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
52521 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
52522 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
52524 #undef TARGET_MEMMODEL_CHECK
52525 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
52527 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
52528 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
52530 #ifdef HAVE_AS_TLS
52531 #undef TARGET_HAVE_TLS
52532 #define TARGET_HAVE_TLS true
52533 #endif
52534 #undef TARGET_CANNOT_FORCE_CONST_MEM
52535 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
52536 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
52537 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
52539 #undef TARGET_DELEGITIMIZE_ADDRESS
52540 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
52542 #undef TARGET_MS_BITFIELD_LAYOUT_P
52543 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
52545 #if TARGET_MACHO
52546 #undef TARGET_BINDS_LOCAL_P
52547 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
52548 #else
52549 #undef TARGET_BINDS_LOCAL_P
52550 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
52551 #endif
52552 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
52553 #undef TARGET_BINDS_LOCAL_P
52554 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
52555 #endif
52557 #undef TARGET_ASM_OUTPUT_MI_THUNK
52558 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
52559 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
52560 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
52562 #undef TARGET_ASM_FILE_START
52563 #define TARGET_ASM_FILE_START x86_file_start
52565 #undef TARGET_OPTION_OVERRIDE
52566 #define TARGET_OPTION_OVERRIDE ix86_option_override
52568 #undef TARGET_REGISTER_MOVE_COST
52569 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
52570 #undef TARGET_MEMORY_MOVE_COST
52571 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
52572 #undef TARGET_RTX_COSTS
52573 #define TARGET_RTX_COSTS ix86_rtx_costs
52574 #undef TARGET_ADDRESS_COST
52575 #define TARGET_ADDRESS_COST ix86_address_cost
52577 #undef TARGET_FIXED_CONDITION_CODE_REGS
52578 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
52579 #undef TARGET_CC_MODES_COMPATIBLE
52580 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
52582 #undef TARGET_MACHINE_DEPENDENT_REORG
52583 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
52585 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
52586 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
52588 #undef TARGET_BUILD_BUILTIN_VA_LIST
52589 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
52591 #undef TARGET_FOLD_BUILTIN
52592 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
52594 #undef TARGET_COMPARE_VERSION_PRIORITY
52595 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
52597 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
52598 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
52599 ix86_generate_version_dispatcher_body
52601 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
52602 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
52603 ix86_get_function_versions_dispatcher
52605 #undef TARGET_ENUM_VA_LIST_P
52606 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
52608 #undef TARGET_FN_ABI_VA_LIST
52609 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
52611 #undef TARGET_CANONICAL_VA_LIST_TYPE
52612 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
52614 #undef TARGET_EXPAND_BUILTIN_VA_START
52615 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
52617 #undef TARGET_MD_ASM_ADJUST
52618 #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
52620 #undef TARGET_PROMOTE_PROTOTYPES
52621 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
52622 #undef TARGET_SETUP_INCOMING_VARARGS
52623 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
52624 #undef TARGET_MUST_PASS_IN_STACK
52625 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
52626 #undef TARGET_FUNCTION_ARG_ADVANCE
52627 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
52628 #undef TARGET_FUNCTION_ARG
52629 #define TARGET_FUNCTION_ARG ix86_function_arg
52630 #undef TARGET_INIT_PIC_REG
52631 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
52632 #undef TARGET_USE_PSEUDO_PIC_REG
52633 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
52634 #undef TARGET_FUNCTION_ARG_BOUNDARY
52635 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
52636 #undef TARGET_PASS_BY_REFERENCE
52637 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
52638 #undef TARGET_INTERNAL_ARG_POINTER
52639 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
52640 #undef TARGET_UPDATE_STACK_BOUNDARY
52641 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
52642 #undef TARGET_GET_DRAP_RTX
52643 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
52644 #undef TARGET_STRICT_ARGUMENT_NAMING
52645 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
52646 #undef TARGET_STATIC_CHAIN
52647 #define TARGET_STATIC_CHAIN ix86_static_chain
52648 #undef TARGET_TRAMPOLINE_INIT
52649 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
52650 #undef TARGET_RETURN_POPS_ARGS
52651 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
52653 #undef TARGET_LEGITIMATE_COMBINED_INSN
52654 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
52656 #undef TARGET_ASAN_SHADOW_OFFSET
52657 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
52659 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
52660 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
52662 #undef TARGET_SCALAR_MODE_SUPPORTED_P
52663 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
52665 #undef TARGET_VECTOR_MODE_SUPPORTED_P
52666 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
52668 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
52669 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
52670 ix86_libgcc_floating_mode_supported_p
52672 #undef TARGET_C_MODE_FOR_SUFFIX
52673 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
52675 #ifdef HAVE_AS_TLS
52676 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
52677 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
52678 #endif
52680 #ifdef SUBTARGET_INSERT_ATTRIBUTES
52681 #undef TARGET_INSERT_ATTRIBUTES
52682 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
52683 #endif
52685 #undef TARGET_MANGLE_TYPE
52686 #define TARGET_MANGLE_TYPE ix86_mangle_type
52688 #if !TARGET_MACHO
52689 #undef TARGET_STACK_PROTECT_FAIL
52690 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
52691 #endif
52693 #undef TARGET_FUNCTION_VALUE
52694 #define TARGET_FUNCTION_VALUE ix86_function_value
52696 #undef TARGET_FUNCTION_VALUE_REGNO_P
52697 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
52699 #undef TARGET_PROMOTE_FUNCTION_MODE
52700 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
52702 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
52703 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
52705 #undef TARGET_MEMBER_TYPE_FORCES_BLK
52706 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
52708 #undef TARGET_INSTANTIATE_DECLS
52709 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
52711 #undef TARGET_SECONDARY_RELOAD
52712 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
52714 #undef TARGET_CLASS_MAX_NREGS
52715 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
52717 #undef TARGET_PREFERRED_RELOAD_CLASS
52718 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
52719 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
52720 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
52721 #undef TARGET_CLASS_LIKELY_SPILLED_P
52722 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
52724 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
52725 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
52726 ix86_builtin_vectorization_cost
52727 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
52728 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
52729 ix86_vectorize_vec_perm_const_ok
52730 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
52731 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
52732 ix86_preferred_simd_mode
52733 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
52734 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
52735 ix86_autovectorize_vector_sizes
52736 #undef TARGET_VECTORIZE_INIT_COST
52737 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
52738 #undef TARGET_VECTORIZE_ADD_STMT_COST
52739 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
52740 #undef TARGET_VECTORIZE_FINISH_COST
52741 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
52742 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
52743 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
52745 #undef TARGET_SET_CURRENT_FUNCTION
52746 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
52748 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
52749 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
52751 #undef TARGET_OPTION_SAVE
52752 #define TARGET_OPTION_SAVE ix86_function_specific_save
52754 #undef TARGET_OPTION_RESTORE
52755 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
52757 #undef TARGET_OPTION_POST_STREAM_IN
52758 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
52760 #undef TARGET_OPTION_PRINT
52761 #define TARGET_OPTION_PRINT ix86_function_specific_print
52763 #undef TARGET_OPTION_FUNCTION_VERSIONS
52764 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
52766 #undef TARGET_CAN_INLINE_P
52767 #define TARGET_CAN_INLINE_P ix86_can_inline_p
52769 #undef TARGET_EXPAND_TO_RTL_HOOK
52770 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
52772 #undef TARGET_LEGITIMATE_ADDRESS_P
52773 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
52775 #undef TARGET_LRA_P
52776 #define TARGET_LRA_P hook_bool_void_true
52778 #undef TARGET_REGISTER_PRIORITY
52779 #define TARGET_REGISTER_PRIORITY ix86_register_priority
52781 #undef TARGET_REGISTER_USAGE_LEVELING_P
52782 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
52784 #undef TARGET_LEGITIMATE_CONSTANT_P
52785 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
52787 #undef TARGET_FRAME_POINTER_REQUIRED
52788 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
52790 #undef TARGET_CAN_ELIMINATE
52791 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
52793 #undef TARGET_EXTRA_LIVE_ON_ENTRY
52794 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
52796 #undef TARGET_ASM_CODE_END
52797 #define TARGET_ASM_CODE_END ix86_code_end
52799 #undef TARGET_CONDITIONAL_REGISTER_USAGE
52800 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
52802 #if TARGET_MACHO
52803 #undef TARGET_INIT_LIBFUNCS
52804 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
52805 #endif
52807 #undef TARGET_LOOP_UNROLL_ADJUST
52808 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
52810 #undef TARGET_SPILL_CLASS
52811 #define TARGET_SPILL_CLASS ix86_spill_class
52813 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
52814 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
52815 ix86_simd_clone_compute_vecsize_and_simdlen
52817 #undef TARGET_SIMD_CLONE_ADJUST
52818 #define TARGET_SIMD_CLONE_ADJUST \
52819 ix86_simd_clone_adjust
52821 #undef TARGET_SIMD_CLONE_USABLE
52822 #define TARGET_SIMD_CLONE_USABLE \
52823 ix86_simd_clone_usable
52825 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
52826 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
52827 ix86_float_exceptions_rounding_supported_p
52829 #undef TARGET_MODE_EMIT
52830 #define TARGET_MODE_EMIT ix86_emit_mode_set
52832 #undef TARGET_MODE_NEEDED
52833 #define TARGET_MODE_NEEDED ix86_mode_needed
52835 #undef TARGET_MODE_AFTER
52836 #define TARGET_MODE_AFTER ix86_mode_after
52838 #undef TARGET_MODE_ENTRY
52839 #define TARGET_MODE_ENTRY ix86_mode_entry
52841 #undef TARGET_MODE_EXIT
52842 #define TARGET_MODE_EXIT ix86_mode_exit
52844 #undef TARGET_MODE_PRIORITY
52845 #define TARGET_MODE_PRIORITY ix86_mode_priority
52847 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
52848 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
52850 #undef TARGET_LOAD_BOUNDS_FOR_ARG
52851 #define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds
52853 #undef TARGET_STORE_BOUNDS_FOR_ARG
52854 #define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds
52856 #undef TARGET_LOAD_RETURNED_BOUNDS
52857 #define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds
52859 #undef TARGET_STORE_RETURNED_BOUNDS
52860 #define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds
52862 #undef TARGET_CHKP_BOUND_MODE
52863 #define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode
52865 #undef TARGET_BUILTIN_CHKP_FUNCTION
52866 #define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function
52868 #undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS
52869 #define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds
52871 #undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT
52872 #define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant
52874 #undef TARGET_CHKP_INITIALIZE_BOUNDS
52875 #define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds
52877 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
52878 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
52880 #undef TARGET_OFFLOAD_OPTIONS
52881 #define TARGET_OFFLOAD_OPTIONS \
52882 ix86_offload_options
52884 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
52885 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
52887 struct gcc_target targetm = TARGET_INITIALIZER;
52889 #include "gt-i386.h"